home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
SGI Freeware 2002 November
/
SGI Freeware 2002 November - Disc 2.iso
/
dist
/
fw_glimpse.idb
/
usr
/
freeware
/
src
/
glimpse-3.0
/
main.c.z
/
main.c
Wrap
C/C++ Source or Header
|
1997-09-09
|
99KB
|
2,986 lines
/* Copyright (c) 1994 Sun Wu, Udi Manber, Burra Gopal. All Rights Reserved. */
/* bgopal: (1993-4) redesigned/rewritten using agrep's library interface */
#include <sys/param.h>
#include <errno.h>
#include "glimpse.h"
#include "defs.h"
#include <fcntl.h>
#include "checkfile.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/file.h> /* for flock definition */
#if ISO_CHAR_SET
#include <locale.h> /* support for 8bit character set */
#endif
#define CLIENTSERVER 1
#define USE_MSGHDR 0
#define USE_UNIXDOMAIN 0
#define DEBUG 0
#define DEF_SERV_PORT 2001
#define MIN_SERV_PORT 1024
#define MAX_SERV_PORT 30000
#define SERVER_QUEUE_SIZE 10 /* number of requests to buffer up while processing one request = 5 */
/* Borrowed from C-Lib */
extern char **environ;
extern int errno;
#if CLIENTSERVER
#include "communicate.c"
#endif /*CLIENTSERVER*/
/* For client-server protocol */
CHAR SERV_HOST[MAXNAME];
int SERV_PORT;
char glimpse_reqbuf[MAX_ARGS*MAX_NAME_LEN];
extern int glimpse_clientdied; /* set if signal received about dead socket: need agrep variable so that exec() can return quickly */
int glimpse_reinitialize = 0;
/* Borrowed from agrep.c */
extern int D_length; /* global variable in agrep */
extern int D; /* global variable in agrep */
extern int pattern_index;
/* These are used for byte level index search */
extern CHAR CurrentFileName[MAX_LINE_LEN];
extern int SetCurrentFileName;
extern int CurrentByteOffset;
extern int SetCurrentByteOffset;
extern int execfd;
extern int agrep_initialfd;
extern CHAR *agrep_inbuffer;
extern int agrep_inlen;
extern int agrep_inpointer;
extern FILE *agrep_finalfp;
extern CHAR *agrep_outbuffer;
extern int agrep_outlen;
extern int agrep_outpointer;
extern int glimpse_call; /* prevent agrep from printing out its usage */
extern int glimpse_isserver; /* prevent agrep from asking for user input */
int first_search = 1; /* intra/interaction in process_query() and glimpse_search() */
#if ISSERVER && SFS_COMPAT
int RemoteFiles = 0; /* Are the files present locally or remotely? If on, then -NQ is automatically added to all search options for each query */
#endif
/* Borrowed from index/io.c */
extern int OneFilePerBlock;
extern int StructuredIndex;
extern unsigned int *dest_index_set;
extern unsigned char *dest_index_buf;
extern unsigned int *src_index_set;
extern unsigned char *src_index_buf;
extern unsigned char *merge_index_buf;
extern int mask_int[32];
extern int indexable_char[256];
int test_indexable_char[256];
extern int p_table[MAX_PARTITION];
extern int GMAX_WORD_SIZE;
extern int IndexNumber; /* used in getword() */
extern int InterpretSpecial; /* used to "not-split" agrep-regexps */
extern int UseFilters; /* defined in build_in.c, used for filtering routines in io.c */
extern int ByteLevelIndex;
extern int file_num;
extern int REAL_PARTITION, REAL_INDEX_BUF, MAX_ALL_INDEX, FILEMASK_SIZE;
/* Borrowed from get_filename.c */
extern int bigbuffer_size;
extern char *bigbuffer;
extern char *outputbuffer;
/* OPTIONS/FLAGS */
int CONTACT_SERVER = 0; /* Should client try to call server at all or just process query on its own? */
int NOBYTELEVEL = 0; /* Some cases where we cannot do byte level fast-search: ALWAYS 0 if !ByteLevelIndex */
int OPTIMIZEBYTELEVEL = 0; /* Some cases where we don't want to do byte level search since number of files is small */
int GLIMITOUTPUT = 0; /* max no. of output lines: 0=>infinity=default=nolimit */
int GLIMITTOTALFILE = 0; /* max no. of files to match: 0=>infinity=default=nolimit */
int GLIMITPERFILE = 0; /* not used in glimpse */
int GBESTMATCH = 0; /* Should I change -B to -# where # = no. of errors? */
int GRECURSIVE = 0;
int GNOPROMPT = 0;
int GBYTECOUNT = 0;
int GPRINTFILENUMBER = 0;
int GOUTTAIL = 0;
int GFILENAMEONLY = 0; /* how to do it if it is an and expression in structured queries */
int GNOFILENAME=0;
int MATCHFILE = 0;
int PRINTATTR = 0;
int PRINTINDEXLINE = 0;
int Pat_as_is=0;
int Only_first=0; /* Do index search only */
int PRINTAPPXFILEMATCH=0; /* Print places in file where match occurs: useful with -b only to analyse the index */
int GCOUNT=0; /* print number of matches rather than actual matches: used only when PRINTAPPX = 1 */
int HINTSFROMUSER=0; /* The user gives the hints about where we should search (result of adding -EQNgy) */
int WHOLEFILESCOPE=0; /* used only when foundattr is NOT set: otherwise, scope is whole file anyway */
int foundattr=0; /* set in split.c -- != 0 only when StructuredIndex AND query is structured */
/* structured queries */
CHAR ***attr_vals; /* matrix of char pointers: row=max #of attributes, col=max possible values */
CHAR **attr_found; /* did the expression corr. to each value in attr_vals match? */
ParseTree *GParse; /* what kind of expression corr. to attr are we looking for */
/* arbitrary booleans */
ParseTree terminals[MAXNUM_PAT]; /* parse tree's terminal node pointers pt. to elements of this array; also used outside */
char matched_terminals[MAXNUM_PAT]; /* ...[i] is 1 if i'th terminal matched: used in filter_output and eval_tree */
int num_terminals; /* number of terminal patterns */
int ComplexBoolean=0; /* 1 if we need to use parse trees and the eval function */
/* index search */
CHAR *pat_list[MAXNUM_PAT]; /* complete words within global pattern */
int pat_lens[MAXNUM_PAT]; /* their lengths */
int pat_attr[MAXNUM_PAT]; /* set of attributes */
int is_mgrep_pat[MAXNUM_PAT];
int mgrep_pat_index[MAXNUM_PAT];
int num_mgrep_pat;
CHAR pat_buf[(MAXNUM_PAT + 2)*MAXPAT];
int pat_ptr = 0;
extern char INDEX_DIR[MAX_LINE_LEN];
char TEMP_DIR[MAX_LINE_LEN];
char indexnumberbuf[256]; /* to read in first few lines of the index */
char *index_argv[MAX_ARGS];
int index_argc = 0;
int bestmatcherrors=0; /* set during index search, used later on */
int patindex;
int patbufpos = -1;
char tempfile[MAX_NAME_LEN];
/* agrep search */
char *agrep_argv[MAX_ARGS];
int agrep_argc = 0;
CHAR *FileOpt; /* the option list after -F */
int fileopt_length;
CHAR GPattern[MAXPAT];
int GM;
CHAR APattern[MAXPAT];
int AM;
CHAR GD_pattern[MAXPAT];
int GD_length;
CHAR **GTextfiles;
CHAR **GTextfilenames;
int *GFileIndex;
int GNumfiles;
int GNumpartitions;
CHAR GProgname[MAXNAME];
/* persistent file descriptors */
#if BG_DEBUG
FILE *debug; /* file descriptor for debugging output */
#endif /*BG_DEBUG*/
FILE *indexfp = NULL; /* glimpse index */
FILE *partfp = NULL; /* glimpse partitions */
FILE *minifp = NULL; /* glimpse turbo */
FILE *nullfp = NULL; /* to discard output: agrep -s doesn't work properly */
int svstdin = 0, svstdout = 1, svstderr = 2;
/* Index manipulation */
struct offsets **src_offset_table;
struct offsets **multi_dest_offset_table[MAXNUM_PAT];
unsigned int *multi_dest_index_set[MAXNUM_PAT];
extern free_list();
struct stat index_stat_buf, file_stat_buf;
/* Direct agrep access for bytelevel-indices */
extern int COUNT, INVERSE, TCOMPRESSED, NOFILENAME, POST_FILTER, OUTTAIL, BYTECOUNT,
LIMITOUTPUT, LIMITPERFILE, LIMITTOTALFILE, PRINTRECORD, DELIMITER, SILENT, FILENAMEONLY, num_of_matched, prev_num_of_matched, FILEOUT;
CHAR matched_region[MAX_REGION_LIMIT*2 + MAXPATT*2];
int RegionLimit=DEFAULT_REGION_LIMIT;
/* Returns number of matched records/lines. Uses agrep's options to output stuff nicely */
int
glimpse_search(AM, APattern, GD_length, GD_pattern, realfilename, filename, fileindex, src_offset_table, outfp)
int AM;
unsigned char APattern[];
int GD_length;
unsigned char GD_pattern[];
char *realfilename;
char *filename;
int fileindex;
struct offsets *src_offset_table[];
FILE *outfp;
{
FILE *infp;
char sig[SIGNATURE_LEN];
struct offsets **p1, *tp1;
CHAR *text, *curtextend, *curtextbegin, c;
int times;
int num, ret, totalret = 0;
int prevoffset = 0, begininterval = 0, endinterval = -1;
CHAR *beginregionptr = 0, *endregionptr = 0;
int beginpage = 0, endpage = -1;
static int MAXTIMES, MAXPGTIMES, pagesize;
static int first_time = 1;
/*
* If can't open file for read, quit
* For each offset for that file:
* seek to that point
* go back until delimiter, go forward until delimiter, output it: MAX_REGION_LIMIT is 16K on either side.
* read in units of RegionLimit
* before outputting matched record, use options to put prefixes (or use memagrep which does everything?)
* Algorithm changed: don't read same page in twice.
*/
if (first_time) {
pagesize = DISKBLOCKSIZE;
MAXTIMES = ((MAX_REGION_LIMIT / RegionLimit) > 1) ? (MAX_REGION_LIMIT / RegionLimit) : 1;
MAXPGTIMES = ((MAX_REGION_LIMIT / pagesize) > 1) ? (MAX_REGION_LIMIT / pagesize) : 1;
first_time = 0;
}
/* Safety: must end/begin with delim */
memcpy(matched_region, GD_pattern, GD_length);
memcpy(matched_region+MAXPATT+2*MAX_REGION_LIMIT, GD_pattern, GD_length);
text = &matched_region[MAX_REGION_LIMIT+MAXPATT];
if ((infp = fopen(filename, "r")) == NULL) return 0;
#if 0
/* Cannot search in .CZ files since offset computations will be incorrect */
TCOMPRESSED = ON;
if (!tuncompressible_filename(file_list[i], strlen(file_list[i]))) TCOMPRESSED = OFF;
num_read = fread(sig, 1, SIGNATURE_LEN, infp);
if ((TCOMPRESSED == ON) && tuncompressible(sig, num_read)) {
EASYSEARCH = sig[SIGNATURE_LEN-1];
if (!EASYSEARCH) {
fprintf(stderr, "not compressed for easy-search: can miss some matches in: %s\n", CurrentFileName); /* not filename!!! */
}
}
else TCOMPRESSED = OFF;
#endif /*0*/
p1 = &src_offset_table[fileindex];
while (*p1 != NULL) {
if ( (begininterval <= (*p1)->offset) && (endinterval > (*p1)->offset) ) { /* already covered this area */
#if DEBUG
printf("ignoring %d in [%d,%d]\n", (*p1)->offset, begininterval, endinterval);
#endif /*DEBUG*/
tp1 = *p1;
*p1 = (*p1)->next;
my_free(tp1, sizeof(struct offsets));
continue;
}
TCOMPRESSED = OFF;
#if 1
if ( (beginpage <= (*p1)->offset) && (endpage >= (*p1)->offset) && (text + ((*p1)->offset - prevoffset) + GD_length < endregionptr)) {
/* beginregionptr = curtextend - GD_length; /* prevent next curtextbegin to go behind previous curtextend (!) */
text += ((*p1)->offset - prevoffset);
prevoffset = (*p1)->offset;
if (!((curtextend = forward_delimiter(text, endregionptr, GD_pattern, GD_length, 1)) < endregionptr))
goto fresh_read;
if (!((curtextbegin = backward_delimiter(text, beginregionptr, GD_pattern, GD_length, 0)) > beginregionptr))
goto fresh_read;
}
else { /* NOT within an area already read: must read another page: if record overlapps page, might read page twice: no time to fix */
fresh_read:
prevoffset = (*p1)->offset;
text = &matched_region[MAX_REGION_LIMIT+MAXPATT]; /* middle: points to occurrence of pattern */
endpage = beginpage = ((*p1)->offset / pagesize) * pagesize;
/* endpage = (((*p1)->offset + pagesize) / pagesize) * pagesize */
endregionptr = beginregionptr = text - ((*p1)->offset - beginpage); /* overlay physical place starting from this logical point */
/* endregionptr = text + (endpage - (*p1)->offset); */
curtextbegin = curtextend = text;
times = 0;
while (times < MAXPGTIMES) {
fseek(infp, endpage, 0);
num = (&matched_region[MAX_REGION_LIMIT*2+MAXPATT] - endregionptr < pagesize) ? (&matched_region[MAX_REGION_LIMIT*2+MAXPATT] - endregionptr) : pagesize;
if ((num = fread(endregionptr, 1, num, infp)) <= 0) break;
endpage += num;
endregionptr += num;
if (endregionptr <= text) {
curtextend = text; /* error in value of offset: file was modified and offsets no longer true: your RISK! */
break;
}
if (((curtextend = forward_delimiter(text, endregionptr, GD_pattern, GD_length, 1)) < endregionptr) ||
(endregionptr >= &matched_region[MAX_REGION_LIMIT*2 + MAXPATT])) break;
times ++;
}
times = 0;
while (times < MAXPGTIMES) { /* I have already read the initial page since endpage is beginpage initially */
if ((curtextbegin = backward_delimiter(text, beginregionptr, GD_pattern, GD_length, 0)) > beginregionptr) break;
if (beginpage > 0) {
if (beginregionptr - pagesize < &matched_region[MAXPATT]) {
if ((num = beginregionptr - &matched_region[MAXPATT]) <= 0) break;
}
else num = pagesize;
beginpage -= num;
beginregionptr -= num;
}
else break;
times ++;
fseek(infp, beginpage, 0);
fread(beginregionptr, 1, num, infp);
}
}
#else /*1*/
/* Find forward delimiter (including delimiter) */
times = 0;
fseek(infp, (*p1)->offset, 0);
while (times < MAXTIMES) {
if ((num = fread(text+RegionLimit*times, 1, RegionLimit, infp)) > 0)
curtextend = forward_delimiter(text, text+RegionLimit*times+num, GD_pattern, GD_length, 1);
if ((curtextend < text+RegionLimit*times+num) || (num < RegionLimit)) break;
times ++;
}
/* Find backward delimiter (including delimiter) */
times = 0;
while (times < MAXTIMES) {
num = ((*p1)->offset - RegionLimit*(times+1)) > 0 ? ((*p1)->offset - RegionLimit*(times+1)) : 0;
fseek(infp, num, 0);
if (num > 0) {
fread(text-RegionLimit*(times+1), 1, RegionLimit, infp);
curtextbegin = backward_delimiter(text, text-RegionLimit*(times+1), GD_pattern, GD_length, 0);
}
else {
fread(text-RegionLimit*times-(*p1)->offset, 1, (*p1)->offset, infp);
curtextbegin = backward_delimiter(text, text-RegionLimit*times-(*p1)->offset, GD_pattern, GD_length, 0);
}
if ((num <= 0) || (curtextbegin > text-RegionLimit*(times+1))) break;
times ++;
}
#endif /*1*/
/* set interval and delete the entry */
begininterval = (*p1)->offset - (text - curtextbegin);
endinterval = (*p1)->offset + (curtextend - text);
if (strncmp(curtextbegin, GD_pattern, GD_length)) {
/* always pass enclosing delimiters to agrep; since we have seen text before curtextbegin + we have space, we can overwrite */
memcpy(curtextbegin - GD_length, GD_pattern, GD_length);
curtextbegin -= GD_length;
}
#if DEBUG
c = *curtextend;
*curtextend = '\0';
printf("%s [%d < %d < %d], text = %d: %s\n", CurrentFileName, begininterval, (*p1)->offset, endinterval, text, curtextbegin);
*curtextend = c;
#endif /*DEBUG*/
tp1 = *p1;
*p1 = (*p1)->next;
my_free(tp1, sizeof(struct offsets));
if (curtextend <= curtextbegin) continue; /* error in offsets/delims */
/*
* Don't call memagrep since that is heavy weight. Call exec
* directly after doing agrep_search()'s preprocessing here.
* PS: can add agrep variable not to do delim search if called from here
* since that prevents unnecessarily scanning the buffer for the 2nd time.
*/
CurrentByteOffset = begininterval+1;
SetCurrentByteOffset = 1;
first_search = 1;
if (first_search) {
if ((ret = memagrep_search(AM, APattern, curtextend-curtextbegin, curtextbegin, 0, outfp)) > 0)
totalret ++; /* += ret */
else if ((ret < 0) && (errno == AGREP_ERROR)) {
fclose(infp);
return -1;
}
first_search = 0;
}
else { /* All agrep globals are properly set: has a bug because agrep's globals aren't properly reinitialized without agrep_search :-( */
agrep_finalfp = (FILE *)outfp;
agrep_outlen = 0;
agrep_outbuffer = NULL;
agrep_outpointer = 0;
execfd = agrep_initialfd = -1;
agrep_inbuffer = curtextbegin;
agrep_inlen = curtextend - curtextbegin;
agrep_inpointer = 0;
if ((ret = exec(-1, NULL)) > 0)
totalret ++; /* += ret; */
else if ((ret < 0) && (errno == AGREP_ERROR)) {
fclose(infp);
return -1;
}
}
if (((LIMITOUTPUT > 0) && (LIMITOUTPUT <= num_of_matched)) ||
((LIMITPERFILE > 0) && (LIMITPERFILE <= num_of_matched - prev_num_of_matched))) break; /* done */
if ((totalret > 0) && FILENAMEONLY) break;
}
SetCurrentByteOffset = 0;
fclose(infp);
if (totalret > 0) { /* dirty solution: must handle part of agrep here */
if (COUNT && !FILEOUT) {
if(!NOFILENAME) fprintf(outfp, "%s: %d\n", CurrentFileName, totalret);
else fprintf(outfp, "%d\n", totalret);
}
else if (FILEOUT) {
file_out(realfilename);
}
}
return totalret;
}
read_index(indexdir)
char indexdir[MAXNAME];
{
char *home;
char s[MAXNAME];
int ret;
if (indexdir[0] == '\0') {
if ((home = (char *)getenv("HOME")) == NULL) {
getcwd(indexdir, MAXNAME-1);
fprintf(stderr, "using working-directory '%s' to locate index\n", indexdir);
}
else strncpy(indexdir, home, MAXNAME);
}
ret = chdir(indexdir);
if (getcwd(INDEX_DIR, MAXNAME-1) == NULL) strcpy(INDEX_DIR, indexdir);
if (ret < 0) {
fprintf(stderr, "using working-directory '%s' to locate index\n", INDEX_DIR);
}
sprintf(s, "%s", INDEX_FILE);
indexfp = fopen(s, "r");
if(indexfp == NULL) {
fprintf(stderr, "can't open glimpse index-file %s/%s\n", INDEX_DIR, INDEX_FILE);
fprintf(stderr, "(use -H to give an index-directory or run 'glimpseindex' to make an index)\n");
return -1;
}
if (stat(s, &index_stat_buf) == -1) {
fprintf(stderr, "can't stat %s/%s\n", INDEX_DIR, s);
fclose(indexfp);
return -1;
}
sprintf(s, "%s", P_TABLE);
partfp = fopen(s, "r");
if(partfp == NULL) {
fprintf(stderr, "can't open glimpse partition-table %s/%s\n", INDEX_DIR, P_TABLE);
fprintf(stderr, "(use -H to specify an index-directory or run glimpseindex to make an index)\n");
return -1;
}
/* Get options */
#if BG_DEBUG
debug = fopen(DEBUG_FILE, "w+");
if(debug == NULL) {
fprintf(stderr, "can't open file %s/%s, errno=%d\n", INDEX_DIR, DEBUG_FILE, errno);
return(-1);
}
#endif /*BG_DEBUG*/
fgets(indexnumberbuf, 256, indexfp);
if(strstr(indexnumberbuf, "1234567890")) IndexNumber = ON;
else IndexNumber = OFF;
fscanf(indexfp, "%%%d\n", &OneFilePerBlock);
if (OneFilePerBlock < 0) {
ByteLevelIndex = ON;
OneFilePerBlock = -OneFilePerBlock;
}
else if (OneFilePerBlock == 0) {
GNumpartitions = get_table(P_TABLE, p_table, MAX_PARTITION, 0);
}
fscanf(indexfp, "%%%d\n", &StructuredIndex);
/* Set WHOLEFILESCOPE for do-it-yourself request processing at client */
WHOLEFILESCOPE = 1;
if (StructuredIndex <= 0) {
WHOLEFILESCOPE = 0;
StructuredIndex = 0;
PRINTATTR = 0; /* doesn't make sense: must not go into filter_output */
}
else if (-1 == (StructuredIndex = attr_load_names(ATTRIBUTE_FILE))) {
fprintf(stderr, "error in reading attribute file %s/%s\n", INDEX_DIR, ATTRIBUTE_FILE);
return(-1);
}
#if BG_DEBUG
fprintf(debug, "buf = %s OneFilePerBlock=%d StructuredIndex=%d\n", indexnumberbuf, OneFilePerBlock, StructuredIndex);
#endif /*BG_DEBUG*/
sprintf(s, "%s", MINI_FILE);
minifp = fopen(s, "r");
/* if (minifp==NULL && OneFilePerBlock) fprintf(stderr, "Can't open for reading: %s/%s --- cannot do very fast search\n", INDEX_DIR, MINI_FILE); */
if (OneFilePerBlock && glimpse_isserver && (minifp != NULL)) read_mini(indexfp, minifp);
read_filenames();
/* Once IndexNumber info is available */
set_indexable_char(indexable_char);
set_indexable_char(test_indexable_char);
set_special_char(indexable_char);
return 0;
}
#define CLEANUP \
{\
int q, k;\
if (indexfp != NULL) fclose(indexfp);\
if (partfp != NULL) fclose(partfp);\
if (minifp != NULL) fclose(minifp);\
if (nullfp != NULL) fclose(nullfp);\
indexfp = partfp = minifp = nullfp = NULL;\
if (ByteLevelIndex) {\
if (src_offset_table != NULL) for (k=0; k<OneFilePerBlock; k++) {\
free_list(&src_offset_table[k]);\
}\
for (q=0; q<MAXNUM_PAT; q++) {\
if (multi_dest_offset_table[q] != NULL) for (k=0; k<OneFilePerBlock; k++) {\
free_list(&multi_dest_offset_table[q][k]);\
}\
}\
}\
if (StructuredIndex) {\
attr_free_table();\
}\
destroy_filename_hashtable();\
}
/* Called whenever we get SIGUSR2/SIGHUP (at the end of process_query()) */
reinitialize_server(argc, argv)
int argc;
char **argv;
{
int i, fd;
CLEANUP;
#if 0
init_filename_hashtable();
region_initialize();
indexfp = partfp = minifp = nullfp = NULL;
if ((nullfp = fopen("/dev/null", "w")) == NULL) {
return(-1);
}
src_offset_table = NULL;
for (i=0; i<MAXNUM_PAT; i++) multi_dest_offset_table[i] = NULL;
if (-1 == read_index(INDEX_DIR)) return(-1);
#if 0
#ifndef LOCK_UN
#define LOCK_UN 8
#endif
if ((fd = open(INDEX_DIR, O_RDONLY)) == -1) return -1;
flock(fd, LOCK_UN);
close(fd);
#endif
return 0;
#else
return execve(argv[0], argv, environ);
#endif
}
/* MUST CARE IF PIPE/SOCKET IS BROKEN! ALSO SIGUSR1 (hardy@cs.colorado.edu) => QUIT CURRENT REQUEST. */
int ignore_signal[32] = { 0,
0, 0, 1, 1, 1, 1, 1, 1, /* all the tracing stuff: since default action is to dump core */
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0 }; /* resource lost: since default action is to dump core */
/* S.t. sockets don't persist: they sometimes have a bad habit of doing so */
void
cleanup()
{
int i;
/* ^C in the middle of a client call */
if (svstderr != 2) {
close(2);
dup(svstderr);
}
fprintf(stderr, "server cleaning up...\n");
CLEANUP;
for (i=0; i<64; i++) close(i);
exit(3);
}
void reinitialize(s)
int s;
{
/* To force main-while loop call reinitialize_server() after do_select() */
glimpse_reinitialize = 1;
#ifdef __svr4__
/* Solaris 2.3 insists that you reset the signal handler */
(void)signal(s, reinitialize);
#endif
}
#define QUITREQUESTMSG "glimpseserver: aborting request...\n"
/* S.t. one request doesn't keep server occupied too long, when client already quits */
void quitrequest(s)
int s;
{
/*
* Don't write onto stderr, since 2 is duped to sockfd => can cause recursive signal!
* Also, don't print error message more than once for quitting one request. The
* server receives signals for EVERY write it attempts when it finds a match: I could
* not find a way to prevent it, but agrep/bitap.c/fill_buf() was fixed to limit it.
* -- bg on 16th Feb 1995
*/
if (!glimpse_clientdied && (s != SIGUSR1)) /* USR1 is a "friendly" cleanup message */
write(svstderr, QUITREQUESTMSG, strlen(QUITREQUESTMSG));
glimpse_clientdied = 1;
#ifdef __svr4__
/* Solaris 2.3 insists that you reset the signal handler */
(void)signal(s, quitrequest);
#endif
}
main(argc, argv)
int argc;
char *argv[];
{
int ret, tried = 0;
char indexdir[MAXNAME];
char **oldargv = argv;
int oldargc = argc;
#if CLIENTSERVER
int sockfd, newsockfd, clilen, len, clpid;
int clout;
#if USE_UNIXDOMAIN
struct sockaddr_un cli_addr, serv_addr;
#else /*USE_UNIXDOMAIN*/
struct sockaddr_in cli_addr, serv_addr;
struct hostent *hp;
#endif /*USE_UNIXDOMAIN*/
int cli_len;
int clargc;
char **clargv;
int clstdin, clstdout, clstderr;
int i;
char array[4];
char *p, c;
#endif /*CLIENTSERVER*/
int quitwhile;
#if ISO_CHAR_SET
setlocale(LC_ALL,""); /* support for 8bit character set: ew@senate.be, Henrik.Martin@eua.ericsson.se */
#endif
#if CLIENTSERVER && ISSERVER
glimpse_isserver = 1; /* I am the server */
#else /*CLIENTSERVER && ISSERVER*/
if (argc <= 1) return(usage()); /* Client nees at least 1 argument */
#endif /*CLIENTSERVER && ISSERVER*/
#define RETURNMAIN(val)\
{\
CLEANUP;\
return (val);\
}
/* once-only initialization */
init_filename_hashtable();
src_offset_table = NULL;
for (i=0; i<MAXNUM_PAT; i++) multi_dest_offset_table[i] = NULL;
gethostname(SERV_HOST, MAXNAME - 2);
SERV_PORT = DEF_SERV_PORT;
srand(getpid());
umask(077);
strcpy(&GProgname[0], argv[0]);
region_initialize();
indexfp = partfp = minifp = nullfp = NULL;
if ((nullfp = fopen("/dev/null", "w")) == NULL) {
fprintf(stderr, "%s: cannot open for writing: /dev/null, errno=%d\n", argv[0], errno);
RETURNMAIN(-1);
}
InterpretSpecial = ON;
GMAX_WORD_SIZE = MAXPAT;
#if CLIENTSERVER
#if !ISSERVER
/* Check if client has too many arguments: then it is surely running as agrep since I have < half those options! */
if (argc > MAX_ARGS) goto doityourself;
#endif /*!ISSERVER*/
while((--argc > 0) && (*++argv)[0] == '-' ) {
p = argv[0] + 1; /* ptr to first character after '-' */
c = *(argv[0]+1);
quitwhile = OFF;
while (!quitwhile && (*p != '\0')) {
c = *p;
switch(c) {
/* Look for -H option at server (only one that makes sense); if client has a -H, then it goes to doityourself */
case 'H' :
if (*(p + 1) == '\0') {/* space after - option */
if (argc <= 1) {
fprintf(stderr, "%s: a directory name must follow the -H option\n", GProgname);
RETURNMAIN(usageS());
}
argv ++;
strcpy(indexdir, argv[0]);
argc --;
}
else {
strcpy(indexdir, p+1);
}
quitwhile = ON;
break;
/* Recognized by both client and server */
case 'J' :
if (*(p + 1) == '\0') {/* space after - option */
if (argc <= 1) {
fprintf(stderr, "%s: the server host name must follow the -J option\n", GProgname);
#if ISSERVER
RETURNMAIN(usageS());
#else /*ISSERVER*/
RETURNMAIN(usage());
#endif /*ISSERVER*/
}
argv ++;
strcpy(SERV_HOST, argv[0]);
argc --;
}
else {
strcpy(SERV_HOST, p+1);
}
quitwhile = ON;
break;
/* Recognized by both client and server */
case 'K' :
if (*(p + 1) == '\0') {/* space after - option */
if (argc <= 1) {
fprintf(stderr, "%s: the server port must follow the -C option\n", GProgname);
#if ISSERVER
RETURNMAIN(usageS());
#else /*ISSERVER*/
RETURNMAIN(usage());
#endif /*ISSERVER*/
}
argv ++;
SERV_PORT = atoi(argv[0]);
argc --;
}
else {
SERV_PORT = atoi(p+1);
}
if ((SERV_PORT < MIN_SERV_PORT) || (SERV_PORT > MAX_SERV_PORT)) {
fprintf(stderr, "Bad server port %d: must be in [%d, %d]: using default %d\n",
SERV_PORT, MIN_SERV_PORT, MAX_SERV_PORT, DEF_SERV_PORT);
SERV_PORT = DEF_SERV_PORT;
}
quitwhile = ON;
break;
#if ISSERVER
#if SFS_COMPAT
case 'R' :
RemoteFiles = ON;
break;
case 'Z' :
/* No op */
break;
#endif
/* server cannot recognize any other option */
default :
fprintf(stderr, "%s: server cannot recognize option: '%s'\n", GProgname, p);
RETURNMAIN(usageS());
#else /*ISSERVER*/
/* These have 1 argument each, so must do quitwhile */
case 'd' :
case 'e' :
case 'k' :
case 'D' :
case 'F' :
case 'I' :
case 'L' :
case 'R' :
case 'S' :
case 'T' :
case 'Y' :
if (argv[0][2] == '\0') {/* space after - option */
if(argc <= 1) {
fprintf(stderr, "%s: the '-%c' option must have an argument\n", GProgname, c);
RETURNMAIN(usage());
}
argv++;
argc--;
}
quitwhile = ON;
break;
/* These are illegal */
case 'f' :
case 'm' :
case 'p' :
case 'v' :
fprintf(stderr, "%s: illegal option: '-%c'\n", GProgname, c);
RETURNMAIN(usage());
/* They can't be patterns and filenames since they start with a -, these don't have arguments */
case 'a' :
case 'b' :
case 'c' :
case 'h' :
case 'i' :
case 'l' :
case 'n' :
case 'o' :
case 'q' :
case 'r' :
case 's' :
case 't' :
case 'u' :
case 'g' :
case 'w' :
case 'x' :
case 'y' :
case 'z' :
case 'A' :
case 'B' :
case 'E' :
case 'G' :
case 'M' :
case 'N' :
case 'O' :
case 'P' :
case 'Q' :
case 'U' :
case 'W' :
case 'Z' :
break;
case 'C':
CONTACT_SERVER = 1;
break;
case 'V' :
printf("\nThis is glimpse version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
RETURNMAIN(0);
default :
if (isdigit(c)) quitwhile = ON;
else {
fprintf(stderr, "%s: illegal option: '-%c'\n", GProgname, c);
RETURNMAIN(usage());
}
break;
#endif /*ISSERVER*/
} /* switch(c) */
p ++;
}
}
#if !ISSERVER
/* Next arg must be the pattern: Check if the user wants to run the client as agrep, or doesn't want to contact the server */
if ((argc > 1) || (!CONTACT_SERVER)) goto doityourself;
#endif /*!ISSERVER*/
argv = oldargv;
argc = oldargc;
#endif /*CLIENTSERVER*/
#if ISSERVER && CLIENTSERVER
if (-1 == read_index(indexdir)) RETURNMAIN(ret);
for (i=0; i<32; i++)
if (ignore_signal[i]) signal(i, SIG_IGN);
signal(SIGHUP, cleanup);
signal(SIGINT, cleanup);
if (((void (*)())-1 == signal(SIGPIPE, quitrequest)) ||
((void (*)())-1 == signal(SIGUSR1, quitrequest)) ||
#ifndef SCO
((void (*)())-1 == signal(SIGURG, quitrequest)) ||
#endif
((void (*)())-1 == signal(SIGUSR2, reinitialize)) ||
((void (*)())-1 == signal(SIGHUP, reinitialize))) {
/* Check for return values here since they ensure reliability */
fprintf(stderr, "glimpseserver: Unable to install signal-handlers.\n");
RETURNMAIN(-1);
}
#if USE_UNIXDOMAIN
if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
fprintf(stderr, "server cannot open socket for communication.\n");
RETURNMAIN(-1);
}
unlink("/tmp/.glimpse_server");
memset((char *)&serv_addr, '\0', sizeof(serv_addr));
serv_addr.sun_family = AF_UNIX;
strcpy(serv_addr.sun_path, "/tmp/.glimpse_server"); /* < 108 ! */
len = strlen(serv_addr.sun_path) + sizeof(serv_addr.sun_family);
#else /*USE_UNIXDOMAIN*/
if ((sockfd = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
perror("glimpseserver: Cannot create socket");
RETURNMAIN(-1);
}
memset((char *)&serv_addr, '\0', sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(SERV_PORT);
#if 0
/* use host-names not internet style d.d.d.d notation */
serv_addr.sin_addr.s_addr = htonl(INADDR_ANY);
#else
/*
* We only want to accept connections from glimpse clients
* on the SERV_HOST, do not use INADDR_ANY!
*/
if ((hp = gethostbyname(SERV_HOST)) == NULL) {
perror("glimpseserver: Cannot resolve host");
RETURNMAIN(-1);
}
memcpy((caddr_t)&serv_addr.sin_addr, hp->h_addr, hp->h_length);
#endif /*0*/
len = sizeof(serv_addr);
#endif /*USE_UNIXDOMAIN*/
if (bind(sockfd, (struct sockaddr *)&serv_addr, len) < 0) {
perror("glimpseserver: Cannot bind to socket");
RETURNMAIN(-1);
}
listen(sockfd, SERVER_QUEUE_SIZE);
printf("glimpseserver: On-line (pid = %d, port = %d) waiting for request...\n", getpid(), SERV_PORT);
fflush(stdout); /* must fflush to print on server stdout */
while (1) {
/*
* Spin until sockfd is ready to do a non-blocking accept(2).
* We only wait for 15 seconds, because SunOS may
* swap us out if we block for 20 seconds or more.
* -- Courtesy: Darren Hardy, hardy@cs.colorado.edu
*/
if ((ret = do_select(sockfd, 15)) == 0) {
if ((errno == EINTR) && glimpse_reinitialize) {
glimpse_reinitialize = 0;
CLEANUP;
close(sockfd);
sleep(IC_PORTRELEASE);
reinitialize_server(oldargc, oldargv);
}
continue;
}
else if (ret != 1) continue;
/* get parameters */
ret = 0;
clargc = 0;
clargv = NULL;
cli_len = sizeof(cli_addr);
if ((newsockfd = accept(sockfd, &cli_addr, &cli_len)) < 0) continue;
if (getreq(newsockfd, glimpse_reqbuf, &clstdin, &clstdout, &clstderr, &clargc, &clargv, &clpid) < 0) {
ret = -1;
#if DEBUG
printf("getreq errno: %d\n", errno);
#endif /*DEBUG*/
goto end_process;
}
#if DEBUG
printf("server processing request on %x\n", newsockfd);
#endif /*DEBUG*/
/*
* Server doesn't wait for response, no point using
svstdin = dup(0);
close(0);
dup(clstdin);
close(clstdin);
*/
/*
* This is wrong since clstderr == clstdout!
svstdout = dup(1);
close(1);
dup(clstdout);
close(clstdout);
svstderr = dup(2);
close(2);
dup(clstderr);
close(clstderr);
*/
svstdout = dup(1);
svstderr = dup(2);
close(1);
close(2);
dup(clstdout);
dup(clstderr);
close(clstdout);
close(clstderr);
/*
* IMPORTANT: Unbuffered I/O to the client!
* Done for Harvest since partial results might be
* needed and fflush will not flush partial results
* to the client if we type ^C and kill it: it puts
* them into /dev/null. This way, output is unbuffered
* and the client sees at least some results if killed.
*/
setbuf(stdout, NULL);
setbuf(stderr, NULL);
glimpse_call = 0;
glimpse_clientdied = 0;
ret = process_query(clargc, clargv, newsockfd);
/*
* Server doesn't wait for response, no point using
close(0);
dup(svstdin);
close(svstdin);
svstdin = 0;
*/
if (glimpse_clientdied) {
/*
* This code is *ONLY* used as a safety net now.
* The old problem was that users would see portions
* of previous (and usually) unrelated queries!
* glimpseserver now uses unbuffered I/O to the
* client so all previous fwrite's to now are
* gone. But since this is such a nasty problem
* we flush stdout to /dev/null just in case.
*/
clout = open("/dev/null", O_WRONLY);
close(1);
dup(clout);
close(clout);
fflush(stdout);
}
/* Restore svstdout and svstdout to stdout/stderr */
close(1);
dup(svstdout);
close(svstdout);
svstdout = 1;
close(2);
dup(svstderr);
close(svstderr);
svstderr = 2;
end_process:
#if USE_MSGHDR
/* send reply and cleanup */
array[0] = (ret & 0xff000000) >> 24;
array[1] = (ret & 0xff0000) >> 16;
array[2] = (ret & 0xff00) >> 8;
array[3] = (ret & 0xff);
writen(newsockfd, array, 4);
#endif /*USE_MSGHDR*/
#if DEBUG
write(1, "done\n", 5);
#endif /*DEBUG*/
for (i=0; i<clargc; i++)
if (clargv[i] != NULL) my_free(clargv[i], 0);
if (clargv != NULL) my_free(clargv, 0);
close(newsockfd); /* if !USE_MSGHDR, client directly reads from socket and writes onto stdout until EOF */
}
#else /*ISSERVER && CLIENTSERVER*/
#if CLIENTSERVER
trynewsocket:
#if USE_UNIXDOMAIN
if ((sockfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
perror("socket");
goto doityourself;
}
memset((char *)&serv_addr, '\0', sizeof(serv_addr));
serv_addr.sun_family = AF_UNIX;
strcpy(serv_addr.sun_path, "/tmp/.glimpse_server"); /* < 108 ! */
len = strlen(serv_addr.sun_path) + sizeof(serv_addr.sun_family);
#else /*USE_UNIXDOMAIN*/
if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
perror("socket");
goto doityourself;
}
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(SERV_PORT);
#if 0
/* use host-names not internet style d.d.d.d notation */
serv_addr.sin__addr.s_addr = inet_addr(SERV_HOST);
#else /*0*/
if ((hp = gethostbyname(SERV_HOST)) == NULL) {
fprintf(stderr, "gethostbyname (%s) failed\n", SERV_HOST);
goto doityourself;
}
memcpy((caddr_t)&serv_addr.sin_addr, hp->h_addr, hp->h_length);
#endif /*0*/
len = sizeof(serv_addr);
#endif /*USE_UNIXDOMAIN*/
if (connect(sockfd, (struct sockaddr *)&serv_addr, len) < 0) {
char errbuf[4096];
sprintf(errbuf, "glimpse: Cannot contact glimpseserver: %s, port %d:", SERV_HOST, SERV_PORT);
perror(errbuf);
/* perror(SERV_HOST); */
#if DEBUG
printf("connect errno: %d\n", errno);
#endif /*DEBUG*/
close(sockfd);
if ((errno == ECONNREFUSED) && (tried < 4)) {
tried ++;
goto trynewsocket;
}
goto doityourself;
}
if (sendreq(sockfd, glimpse_reqbuf, fileno(stdin), fileno(stdout), fileno(stderr), argc, argv, getpid()) < 0) {
perror("sendreq");
#if DEBUG
printf("sendreq errno: %d\n", errno);
#endif /*DEBUG*/
close(sockfd);
goto doityourself;
}
#if USE_MSGHDR
if (readn(sockfd, array, 4) != 4) {
close(sockfd);
goto doityourself;
}
ret = (array[0] << 24) + (array[1] << 16) + (array[2] << 8) + array[3];
#else /*USE_MSGHDR*/
{
/*
* Dump everything the server writes into the socket onto
* stdout until EOF/error. Do this in a way so that *everything*
* the server sends is dumped to stdout by the client. The
* client might die suddenly via ^C or SIGTERM, but we still
* want the results.
*/
char tmpbuf[1024];
int n;
while ((n = read(sockfd, tmpbuf, 1024)) > 0) {
write(fileno(stdout), tmpbuf, n);
}
}
#endif /*USE_MSGHDR*/
close(sockfd);
RETURNMAIN(ret);
doityourself:
#if DEBUG
printf("doing it myself :-(\n");
#endif /*DEBUG*/
#endif /*CLIENTSERVER*/
setbuf(stdout, NULL); /* Unbuffered I/O to always get every result */
setbuf(stderr, NULL);
glimpse_call = 0;
glimpse_clientdied = 0;
ret = process_query(oldargc, oldargv, fileno(stdin));
RETURNMAIN(ret);
#endif /*ISSERVER && CLIENTSERVER*/
}
process_query(argc, argv, newsockfd)
int argc;
char *argv[];
int newsockfd;
{
int searchpercent;
int num_blocks;
int num_read;
int i, j;
int iii; /* Udi */
int jjj;
char c;
char *p;
int ret;
int jj;
int quitwhile;
char indexdir[MAX_LINE_LEN];
char TEMP_FILE[MAX_LINE_LEN];
char temp_file[MAX_LINE_LEN];
int oldargc = argc;
char **oldargv = argv;
CHAR dummypat[MAX_PAT];
int dummylen=0;
int my_M_index, my_P_index, my_b_index, my_A_index, my_l_index = -1, my_B_index = -1;
char **outname;
int gnum_of_matched = 0;
int gprev_num_of_matched = 0;
int gfiles_matched = 0;
int foundpat = 0;
int wholefilescope=0;
int nobytelevelmustbeon=0;
if ((argc <= 0) || (argv == NULL)) {
errno = EINVAL;
return -1;
}
/*
* Macro to destroy EVERYTHING before return since we might want to make this a
* library function later on: convention is that after destroy, objects are made
* NULL throughout the source code, and are all set to NULL at initialization time.
* DO agrep_argv, index_argv and FileOpt my_malloc/my_free optimizations later.
* my_free calls have 2nd parameter = 0 if the size is not easily determinable.
*/
#define RETURN(val) \
{\
int q,k;\
\
first_search = 0;\
for (k=0; k<MAX_ARGS; k++) {\
if (agrep_argv[k] != NULL) my_free(agrep_argv[k], 0);\
if (index_argv[k] != NULL) my_free(index_argv[k], 0);\
agrep_argv[k] = index_argv[k] = NULL;\
}\
if (FileOpt != NULL) my_free(FileOpt, MAXFILEOPT);\
FileOpt = NULL;\
for (k=0; k<MAXNUM_PAT; k++) {\
if (pat_list[k] != NULL) my_free(pat_list[k], 0);\
pat_list[k] = NULL;\
}\
sprintf(tempfile, "%s/.glimpse_tmp.%d", TEMP_DIR, getpid());\
unlink(tempfile);\
sprintf(outname[0], "%s/.glimpse_apply.%d", TEMP_DIR, getpid());\
unlink(outname[0]);\
my_free(outname[0], 0);\
my_free(outname, 0);\
\
if (ByteLevelIndex) {\
if (src_offset_table != NULL) for (k=0; k<OneFilePerBlock; k++) {\
free_list(&src_offset_table[k]);\
}\
/* Don't make src_offset_table itself NULL: it will be bzero-d below if !NULL */\
for (q=0; q<MAXNUM_PAT; q++) {\
if (multi_dest_offset_table[q] != NULL) for (k=0; k<OneFilePerBlock; k++) {\
free_list(&multi_dest_offset_table[q][k]);\
}\
/* Don't make multi_dest_offset_table[q] itself NULL: it will be bzero-d below if !NULL */\
}\
}\
for (k=0; k<num_terminals;k++)\
free(terminals[k].data.leaf.value);\
if (ComplexBoolean) destroy_tree(&GParse);\
for (k=0; k<GNumfiles; k++) {\
my_free(GTextfiles[k], 0);\
GTextfiles[k] = NULL;\
}\
/* Don't free the GTextfiles buffer itself since it is allocated once in get_filename.c */\
return (val);\
}
/*
* Initialize
*/
strcpy(&GProgname[0], argv[0]);
if (argc <= 1) return(usage());
strcpy(TEMP_DIR, "/tmp");
D_length = 0;
D = 0;
pattern_index = 0;
first_search = 1;
outname = (char **)my_malloc(sizeof(char *));
outname[0] = (char *)my_malloc(MAX_LINE_LEN);
NOBYTELEVEL = 0;
OPTIMIZEBYTELEVEL = 0;
GLIMITOUTPUT = 0;
GLIMITTOTALFILE = 0;
GBESTMATCH = 0;
GRECURSIVE = 0;
GNOPROMPT = 0;
GBYTECOUNT = 0;
GPRINTFILENUMBER = 0;
GOUTTAIL = 2; /* stupid fix, but works */
GFILENAMEONLY = 0;
GNOFILENAME = 0;
MATCHFILE = 0;
PRINTATTR = 0;
PRINTINDEXLINE = 0;
Pat_as_is=0;
Only_first = 0;
PRINTAPPXFILEMATCH = 0;
GCOUNT = 0;
HINTSFROMUSER = 0;
foundattr = 0;
ComplexBoolean = 0;
bestmatcherrors = 0;
patbufpos = -1;
RegionLimit=DEFAULT_REGION_LIMIT;
strcpy(GD_pattern, "\n");
GD_length = strlen(GD_pattern);
indexdir[0] = '\0';
memset(index_argv, '\0', sizeof(char *) * MAX_ARGS);
index_argc = 0;
memset(agrep_argv, '\0', sizeof(char *) * MAX_ARGS);
agrep_argc = 0;
FileOpt = NULL;
fileopt_length = 0;
memset(pat_list, '\0', sizeof(char *) * MAXNUM_PAT);
memset(pat_attr, '\0', sizeof(int) * MAXNUM_PAT);
for (i=0; i<MAX_ARGS; i++)
index_argv[i] = (char *)my_malloc(MaxNameLength + 2);
memset(is_mgrep_pat, '\0', sizeof(int) * MAXNUM_PAT);
memset(mgrep_pat_index, '\0', sizeof(int) *MAXNUM_PAT);
num_mgrep_pat = 0;
memset(pat_buf, '\0', (MAXNUM_PAT + 2)*MAXPAT);
pat_ptr = 0;
sprintf(tempfile, "%s/.glimpse_tmp.%d", TEMP_DIR, getpid());
/* Set WHOLEFILESCOPE for per-request processing at server */
if (StructuredIndex) WHOLEFILESCOPE = 1;
else WHOLEFILESCOPE = 0;
if (argc > MAX_ARGS) {
#if ISSERVER
fprintf(stderr, "too many arguments %d obtained on server!\n", argc);
#endif /*ISSERVER*/
i = fileagrep(oldargc, oldargv, 0, stdout);
RETURN(i);
}
/*
* Process what options you can, then call fileagrep_init() to set
* options in agrep and get the pattern. Then, call fileagrep_search().
* Begin by copying options into agrep_argv assuming glimpse was not
* called as agrep (optimistic :-).
*/
agrep_argc = 0;
for (i=0; i<MAX_ARGS; i++) agrep_argv[i] = NULL;
agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);
strcpy(agrep_argv[agrep_argc], argv[0]); /* copy the name of the program anyway */
agrep_argc ++;
/* In glimpse, you should never output filenames with zero matches */
if (agrep_argc + 1 >= MAX_ARGS) {
fprintf(stderr, "%s: too many options!\n", GProgname);
RETURN(usage());
}
agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
agrep_argv[agrep_argc][0] = '-';
agrep_argv[agrep_argc][1] = 'z';
agrep_argv[agrep_argc][2] = '\0';
agrep_argc ++;
/* In glimpse, you should always print pattern when using mgrep (user can't do -f or -m)! */
if (agrep_argc + 1 >= MAX_ARGS) {
fprintf(stderr, "%s: too many options!\n", GProgname);
RETURN(usage());
}
agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
agrep_argv[agrep_argc][0] = '-';
agrep_argv[agrep_argc][1] = 'P';
agrep_argv[agrep_argc][2] = '\0';
my_P_index = agrep_argc;
agrep_argc ++;
/* In glimpse, you should always output multiple when doing mgrep */
if (agrep_argc + 1 >= MAX_ARGS) {
fprintf(stderr, "%s: too many options!\n", GProgname);
RETURN(usage());
}
agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
agrep_argv[agrep_argc][0] = '-';
agrep_argv[agrep_argc][1] = 'M';
agrep_argv[agrep_argc][2] = '\0';
my_M_index = agrep_argc;
agrep_argc ++;
/* In glimpse, you should print the byte offset if there is a structured query */
if (agrep_argc + 1 >= MAX_ARGS) {
fprintf(stderr, "%s: too many options!\n", GProgname);
RETURN(usage());
}
agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
agrep_argv[agrep_argc][0] = '-';
agrep_argv[agrep_argc][1] = 'b';
agrep_argv[agrep_argc][2] = '\0';
my_b_index = agrep_argc;
agrep_argc ++;
/* In glimpse, you should always have space for doing -m if required */
if (agrep_argc + 2 >= MAX_ARGS) {
fprintf(stderr, "%s: too many options!\n", GProgname);
RETURN(usage());
}
agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
agrep_argv[agrep_argc][0] = '-';
agrep_argv[agrep_argc][1] = 'm';
agrep_argv[agrep_argc][2] = '\0';
agrep_argc ++;
agrep_argv[agrep_argc] = (char *)my_malloc(2); /* no op */
agrep_argv[agrep_argc][0] = '\0';
agrep_argc ++;
/* Add -A option to print filenames as default */
if (agrep_argc + 1 >= MAX_ARGS) {
fprintf(stderr, "%s: too many options!\n", GProgname);
RETURN(usage());
}
agrep_argv[agrep_argc] = (char *)my_malloc(sizeof(char *));
agrep_argv[agrep_argc][0] = '-';
agrep_argv[agrep_argc][1] = 'A';
agrep_argv[agrep_argc][2] = '\0';
my_A_index = agrep_argc;
agrep_argc ++;
while((agrep_argc < MAX_ARGS) && (--argc > 0) && (*++argv)[0] == '-' ) {
p = argv[0] + 1; /* ptr to first character after '-' */
c = *(argv[0]+1);
quitwhile = OFF;
while (!quitwhile && (*p != '\0')) {
c = *p;
switch(c) {
case 'F' :
MATCHFILE = ON;
FileOpt = (CHAR *)my_malloc(MAXFILEOPT);
if (*(p + 1) == '\0') {/* space after - option */
if(argc <= 1) {
fprintf(stderr, "%s: a file pattern must follow the -F option\n", GProgname);
RETURN(usage());
}
argv++;
if ((dummylen = strlen(argv[0])) > MAXFILEOPT) {
fprintf(stderr, "%s: -F option list too long\n", GProgname);
RETURN(usage());
}
strcpy(FileOpt, argv[0]);
argc--;
} else {
if ((dummylen = strlen(p+1)) > MAXFILEOPT) {
fprintf(stderr, "%s: -F option list too long\n", GProgname);
RETURN(usage());
}
strcpy(FileOpt, p+1);
} /* else */
quitwhile = ON;
break;
/*
* indexed search - use the exact pattern to search the index as well:
* not implemented yet
case 'X' :
Pat_as_is = ON;
break;
*/
/* search the index only and output the number of blocks */
case 'N' :
Only_first = ON;
break ;
/* also keep track of the matches in each file */
case 'Q' :
PRINTAPPXFILEMATCH = ON;
break ;
case 'U' :
HINTSFROMUSER = ON;
break;
/* go to home directory to find the index: even if server overwrites indexdir here, it won't overwrite INDEX_DIR until read_index() */
case 'H' :
if (*(p + 1) == '\0') {/* space after - option */
if (argc <= 1) {
fprintf(stderr, "%s: a directory name must follow the -H option\n", GProgname);
RETURN(usage());
}
argv ++;
#if !ISSERVER
strcpy(indexdir, argv[0]);
#endif /*!ISSERVER*/
argc --;
}
#if !ISSERVER
else {
strcpy(indexdir, p+1);
}
agrep_argv[agrep_argc] = (char *)my_malloc(4);
strcpy(agrep_argv[agrep_argc], "-H");
agrep_argc ++;
agrep_argv[agrep_argc] = (char *)my_malloc(strlen(indexdir) + 2);
strcpy(agrep_argv[agrep_argc], indexdir);
agrep_argc ++;
#endif /*!ISSERVER*/
quitwhile = ON;
break;
#if ISSERVER && SFS_COMPAT
/* INDEX_DIR will be already set since this is the server, so we can direclty xfer the .glimpse_* files */
case '.' :
strcpy(TEMP_FILE, INDEX_DIR);
strcpy(temp_file, ".");
strcat(TEMP_FILE, "/.");
if (*(p + 1) == '\0') {/* space after - option */
if (argc <= 1) {
fprintf(stderr, "%s: a file name must follow the -. option\n", GProgname);
RETURN(usage());
}
argv ++;
strcat(TEMP_FILE, argv[0]);
strcat(temp_file, argv[0]);
argc --;
}
else {
strcat(TEMP_FILE, p+1);
strcat(temp_file, p+1);
}
if (!strcmp(temp_file, INDEX_FILE) || !strcmp(temp_file, FILTER_FILE) ||
!strcmp(temp_file, ATTRIBUTE_FILE) || !strcmp(temp_file, MINI_FILE) ||
!strcmp(temp_file, P_TABLE) || !strcmp(temp_file, PROHIBIT_LIST) ||
!strcmp(temp_file, INCLUDE_LIST) || !strcmp(temp_file, NAME_LIST) ||
!strcmp(temp_file, NAME_LIST_INDEX) || !strcmp(temp_file, NAME_HASH) ||
!strcmp(temp_file, NAME_HASH_INDEX) || !strcmp(temp_file, DEF_STAT_FILE) ||
!strcmp(temp_file, DEF_MESSAGE_FILE)) {
if ((ret = open(TEMP_FILE, O_RDONLY, 0)) <= 0) RETURN(ret);
while ((num_read = read(ret, matched_region, MAX_REGION_LIMIT*2)) > 0) {
write(1 /* NOT TO newsockfd since that was got by a syscall!!! */, matched_region, num_read);
}
close(ret);
}
quitwhile = ON;
RETURN(0);
#endif /* ISSERVER */
/* go to temp directory to create temp files */
case 'T' :
if (*(p + 1) == '\0') {/* space after - option */
if (argc <= 1) {
fprintf(stderr, "%s: a directory name must follow the -T option\n", GProgname);
RETURN(usage());
}
argv ++;
strcpy(TEMP_DIR, argv[0]);
argc --;
}
else {
strcpy(TEMP_DIR, p+1);
}
sprintf(tempfile, "%s/.glimpse_tmp.%d", TEMP_DIR, getpid());
quitwhile = ON;
break;
case 'R' :
if (*(p + 1) == '\0') {/* space after - option */
if (argc <= 1) {
fprintf(stderr, "%s: the record size must follow the -R option\n", GProgname);
RETURN(usage());
}
argv ++;
RegionLimit = atoi(argv[0]);
argc --;
}
else {
RegionLimit = atoi(p+1);
}
if ((RegionLimit <= 0) || (RegionLimit > MAX_REGION_LIMIT)) {
fprintf(stderr, "Bad record size %d: must be in [%d, %d]: using default %d\n",
RegionLimit, 1, MAX_REGION_LIMIT, DEFAULT_REGION_LIMIT);
RegionLimit = DEFAULT_REGION_LIMIT;
}
quitwhile = ON;
break;
/* doesn't matter if we overwrite the value in the client since the same value would have been picked up in main() anyway */
case 'J' :
if (*(p + 1) == '\0') {/* space after - option */
if (argc <= 1) {
fprintf(stderr, "%s: the server host name must follow the -J option\n", GProgname);
RETURNMAIN(usageS());
}
argv ++;
#if !ISSERVER
strcpy(SERV_HOST, argv[0]);
#endif /*!ISSERVER*/
argc --;
}
#if !ISSERVER
else {
strcpy(SERV_HOST, p+1);
}
#endif /*!ISSERVER*/
quitwhile = ON;
break;
/* doesn't matter if we overwrite the value in the client since the same value would have been picked up in main() anyway */
case 'K' :
if (*(p + 1) == '\0') {/* space after - option */
if (argc <= 1) {
fprintf(stderr, "%s: the server port must follow the -C option\n", GProgname);
RETURN(usage());
}
argv ++;
#if !ISSERVER
SERV_PORT = atoi(argv[0]);
#endif /*!ISSERVER*/
argc --;
}
#if !ISSERVER
else {
SERV_PORT = atoi(p+1);
}
if ((SERV_PORT < MIN_SERV_PORT) || (SERV_PORT > MAX_SERV_PORT)) {
fprintf(stderr, "Bad server port %d: must be in [%d, %d]: using default %d\n",
SERV_PORT, MIN_SERV_PORT, MAX_SERV_PORT, DEF_SERV_PORT);
SERV_PORT = DEF_SERV_PORT;
}
#endif /*!ISSERVER*/
quitwhile = ON;
break;
case 'C' :
CONTACT_SERVER = 1;
break;
case 'a' :
PRINTATTR = 1;
break;
case 'E':
PRINTINDEXLINE = 1;
break;
case 'W':
wholefilescope = 1;
break;
case 'z' :
UseFilters = 1;
break;
case 'r' :
GRECURSIVE = 1;
break;
case 'V' :
printf("\nThis is glimpse version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
RETURN(0);
/* Must let 'f' and 'm' fall thru to default once multipatterns are done in agrep */
case 'f' :
case 'p' :
case 'm' :
case 'v' :
fprintf(stderr, "%s: illegal option: '-%c'\n", GProgname, c);
RETURN(usage());
case 'I' :
case 'D' :
case 'S' :
/* There is no space after these options */
agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);
agrep_argv[agrep_argc][0] = '-';
strcpy(agrep_argv[agrep_argc] + 1, p);
agrep_argc ++;
quitwhile = ON;
break;
case 'l':
GFILENAMEONLY = 1;
my_l_index = agrep_argc;
agrep_argv[agrep_argc] = (char *)my_malloc(4);
agrep_argv[agrep_argc][0] = '-';
agrep_argv[agrep_argc][1] = c;
agrep_argv[agrep_argc][2] = '\0';
agrep_argc ++;
break;
/*
* Copy the set of options for agrep: put them in separate argvs
* even if they are together after one '-' (easier to process).
* These are agrep options which glimpse has to peek into.
*/
default:
agrep_argv[agrep_argc] = (char *)my_malloc(16);
agrep_argv[agrep_argc][0] = '-';
agrep_argv[agrep_argc][1] = c;
agrep_argv[agrep_argc][2] = '\0';
agrep_argc ++;
if (c == 'n') {
nobytelevelmustbeon=1;
}
else if (c == 'b') GBYTECOUNT = 1;
else if (c == 'g') GPRINTFILENUMBER = 1;
else if (c == 't') GOUTTAIL = 1;
else if (c == 'y') GNOPROMPT = 1;
else if (c == 'h') GNOFILENAME = 1;
else if (c == 'c') GCOUNT = 1;
else if (c == 'B') {
GBESTMATCH = 1;
my_B_index = agrep_argc - 1;
}
/* the following options are followed by a parameter */
else if ((c == 'e') || (c == 'd') || (c == 'L') || (c == 'k')) {
if (*(p + 1) == '\0') {/* space after - option */
if(argc <= 1) {
fprintf(stderr, "%s: the '-%c' option must have an argument\n", GProgname, c);
RETURN(usage());
}
argv++;
if ( (c == 'd') && ((D_length = strlen(argv[0])) > MAX_NAME_SIZE) ) {
fprintf(stderr, "%s: delimiter pattern too long (has > %d chars)\n", GProgname, MAX_NAME_SIZE);
RETURN(usage());
/* Should this be RegionLimit if ByteLevelIndex? */
}
else if (c == 'L') {
GLIMITOUTPUT = GLIMITTOTALFILE = GLIMITPERFILE = 0;
sscanf(argv[0], "%d:%d:%d", &GLIMITOUTPUT, &GLIMITTOTALFILE, &GLIMITPERFILE);
if ((GLIMITOUTPUT < 0) || (GLIMITTOTALFILE < 0) || (GLIMITPERFILE < 0)) {
fprintf(stderr, "%s: invalid output limit %s\n", GProgname, argv[0]);
RETURN(usage());
}
}
agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);
strcpy(agrep_argv[agrep_argc], argv[0]);
if (c == 'd') {
preprocess_delimiter(argv[0], D_length, GD_pattern, &GD_length);
if (GOUTTAIL == 2) GOUTTAIL = 0;
/* Should this be RegionLimit if ByteLevelIndex? */
}
argc--;
} else {
if ( (c == 'd') && ((D_length = strlen(p+1)) > MAX_NAME_SIZE) ) {
fprintf(stderr, "%s: delimiter pattern too long (has > %d chars)\n", GProgname, MAX_NAME_SIZE);
RETURN(usage());
/* Should this be RegionLimit if ByteLevelIndex? */
}
else if (c == 'L') {
GLIMITOUTPUT = GLIMITTOTALFILE = GLIMITPERFILE = 0;
sscanf(p+1, "%d:%d:%d", &GLIMITOUTPUT, &GLIMITTOTALFILE, &GLIMITPERFILE);
if ((GLIMITOUTPUT < 0) || (GLIMITTOTALFILE < 0) || (GLIMITPERFILE < 0)) {
fprintf(stderr, "%s: invalid output limit %s\n", GProgname, p+1);
RETURN(usage());
}
}
agrep_argv[agrep_argc] = (char *)my_malloc(strlen(p+1) + 2);
strcpy(agrep_argv[agrep_argc], p+1);
if (c == 'd') {
preprocess_delimiter(p+1, D_length-2, GD_pattern, &GD_length);
if (GOUTTAIL == 2) GOUTTAIL = 0;
/* Should this be RegionLimit if ByteLevelIndex? */
}
}
agrep_argc ++;
#if DEBUG
fprintf(stderr, "%d = %s\n", agrep_argc, agrep_argv[agrep_argc - 1]);
#endif /*DEBUG*/
quitwhile = ON;
if ((c == 'e') || (c == 'k')) foundpat = 1;
}
/* else it is something that glimpse doesn't know and agrep needs to look at */
break; /* from default: */
} /* switch(c) */
p ++;
}
} /* while (--argc > 0 && (*++argv)[0] == '-') */
/* exitloop: */
if ((GBESTMATCH == ON) && (MATCHFILE == ON) && (Only_first == ON))
fprintf(stderr, "%s: Warning: the number of matches may be incorrect when -B is used with -F.\n", HARVEST_PREFIX);
if (GOUTTAIL) GOUTTAIL = 1;
if (GNOFILENAME) {
agrep_argv[my_A_index][1] = 'Z'; /* ignore the -A option */
}
#if ISSERVER && SFS_COMPAT
if (RemoteFiles) { /* force -NQ so that won't start looking for files! */
Only_first = ON;
PRINTAPPXFILEMATCH = ON;
}
#endif
if (argc > 0) {
/* copy the rest of the options the pattern and the filenames if any verbatim */
for (i=0; i<argc; i++) {
if (agrep_argc >= MAX_ARGS) break;
agrep_argv[agrep_argc] = (char *)my_malloc(strlen(argv[0]) + 2);
strcpy(agrep_argv[agrep_argc], argv[0]);
agrep_argc ++;
argv ++;
}
if (!foundpat) argc --;
}
#if 0
for (j=0; j<agrep_argc; j++) printf("agrep_argv[%d] = %s\n", j, agrep_argv[j]);
printf("argc = %d\n", argc);
#endif /*0*/
/*
* Now perform the search by first looking at the index
* and obtaining the files to search; and then search
* them and output the result. If argc > 0, glimpse
* runs as agrep: otherwise, it searches index, etc.
*/
if (argc <= 0) {
glimpse_call = 1;
/* Initialize some data structures, read the index */
if (GRECURSIVE == 1) {
fprintf(stderr, "illegal option: '-r'\n");
RETURN(usage());
}
num_terminals = 0;
GParse = NULL;
memset(terminals, '\0', sizeof(ParseTree) * MAXNUM_PAT);
#if !ISSERVER
if (-1 == read_index(indexdir)) RETURN(-1);
#endif /*!ISSERVER*/
/*
This handles the -n option with ByteLevelIndex: disabled as of now, else should go into file search...
if (nobytelevelmustbeon && ByteLevelIndex) {
fprintf(stderr, "Warning: -n option used with byte-level index: must SEARCH the files\n");
NOBYTELEVEL=ON;
}
*/
WHOLEFILESCOPE = (WHOLEFILESCOPE || wholefilescope);
if (ByteLevelIndex) {
/* Must zero them here in addition to index search so that RETURN macro runs correctly */
if ((src_offset_table == NULL) &&
((src_offset_table = (struct offsets **)my_malloc(sizeof(struct offsets *) * OneFilePerBlock)) == NULL)) exit(2);
memset(src_offset_table, '\0', sizeof(struct offsets *) * OneFilePerBlock);
for (i=0; i<MAXNUM_PAT; i++) {
if ((multi_dest_offset_table[i] == NULL) &&
((multi_dest_offset_table[i] = (struct offsets **)my_malloc(sizeof(struct offsets *) * OneFilePerBlock)) == NULL)) exit(2);
memset(multi_dest_offset_table[i], '\0', sizeof(struct offsets *) * OneFilePerBlock);
}
}
read_filters(INDEX_DIR, UseFilters);
if (glimpse_clientdied) RETURN(0);
/* Now initialize agrep, set the options and get the actual pattern into GPattern */
if ((GM = fileagrep_init(agrep_argc, agrep_argv, MAXPAT, GPattern)) <= 0) {
/* this printf need not be there: agrep prints messages if error */
fprintf(stderr, "%s: error in options or arguments to `agrep'\n", HARVEST_PREFIX);
RETURN(usage());
}
patindex = pattern_index;
for (j=0; j<GM; j++) {
if (GPattern[j] == '\\') j++;
else if (test_indexable_char[GPattern[j]]) break;
}
if (j >= GM) {
fprintf(stderr, "%s: pattern '%s' has no indexable characters: glimpse cannot search for it\n", HARVEST_PREFIX, GPattern);
RETURN(-1);
}
/* Split GPattern into individual boolean terms */
if (split_pattern(GPattern, GM, APattern, terminals, &num_terminals, &GParse, StructuredIndex) <= 0) RETURN(-1);
#if BG_DEBUG
fprintf(debug, "GPattern = %s, APattern = %s, num_terminals = %d\n", GPattern, APattern, num_terminals);
#endif /*BG_DEBUG*/
if (foundattr) WHOLEFILESCOPE = 1; /* makes no sense to search attribute=value expressions without WHOLEFILESCOPE */
else if (!ComplexBoolean && !PRINTATTR && !((long)GParse & AND_EXP)) WHOLEFILESCOPE = 0; /* ORs can be done without WHOLEFILESCOPE */
if (WHOLEFILESCOPE <= 0) agrep_argv[my_b_index][1] = 'Z';
/*
if (!ComplexBoolean && ((long)GParse & AND_EXP) && (my_l_index != -1)) agrep_argv[my_l_index][1] = 'Z';
*/
if ((ComplexBoolean || ((long)GParse & AND_EXP)) && (my_l_index != -1)) agrep_argv[my_l_index][1] = 'Z';
/* Now re-initialize agrep_argv with APattern instead of GPattern */
my_free(agrep_argv[patindex], 0);
AM=strlen(APattern);
agrep_argv[patindex] = (char *)my_malloc(AM + 2);
strcpy(agrep_argv[patindex], APattern);
if (HINTSFROMUSER) {
int num=0, x, y, i, j;
char temp[MAX_NAME_SIZE+2];
struct offsets *o, *tailo, *heado;
while(1) {
if ((num = readline(newsockfd, dest_index_buf, REAL_INDEX_BUF)) <= 0) {
fprintf(stderr, "Input format error with -U option\n");
RETURN(-1);
}
dest_index_buf[num+1] = '\n';
if (!strncmp(dest_index_buf, "BEGIN", strlen("BEGIN"))) break;
}
sscanf(&dest_index_buf[strlen("BEGIN")], "%d%d%d", &bestmatcherrors, &NOBYTELEVEL, &OPTIMIZEBYTELEVEL);
/* printf("BEGIN %d %d %d\n", bestmatcherrors, NOBYTELEVEL, OPTIMIZEBYTELEVEL); */
num = readline(newsockfd, dest_index_buf, REAL_INDEX_BUF);
while (num > 0) {
dest_index_buf[num+1] = '\n';
if (!strncmp(dest_index_buf, "END", strlen("END"))) break;
i = j = 0;
while ((j<MAX_NAME_SIZE) && (dest_index_buf[i] != ' ') && (dest_index_buf[i] != '[') && (dest_index_buf[i] != '\n'))
temp[j++] = dest_index_buf[i++];
temp[j] = '\0';
x = atoi(temp);
GFileIndex[GNumfiles] = x;
if (x == file_num - 1) {
bigbuffer[bigbuffer_size] = '\0';
GTextfiles[GNumfiles++] = (CHAR *)strdup(GTextfilenames[x]);
bigbuffer[bigbuffer_size] = '\n';
}
else {
*(GTextfilenames[x+1] - 1) = '\0';
GTextfiles[GNumfiles++] = (CHAR *)strdup(GTextfilenames[x]);
*(GTextfilenames[x+1] - 1) = '\n';
}
/* printf("%d %s [", x, GTextfiles[GNumfiles-1]); */
src_index_set[block2index(x)] |= block2mask(x);
if (ByteLevelIndex && !NOBYTELEVEL) {
heado = tailo = NULL;
onemorey:
j = 0;
while ((j<MAX_NAME_SIZE) && ((dest_index_buf[i] == ' ') || (dest_index_buf[i] == '['))) i++;
while ((j<MAX_NAME_SIZE) && (dest_index_buf[i] != ' ') && (dest_index_buf[i] != '\n') && (dest_index_buf[i] != ']'))
temp[j++] = dest_index_buf[i++];
temp[j] = '\0';
y = atoi(temp);
/* printf(" %d", y); */
o = (struct offsets *)my_malloc(sizeof(struct offsets));
o->offset = y;
o->next = NULL;
o->sign = o->done = 0;
if (heado == NULL) {
heado = o;
tailo = o;
}
else {
tailo->next = o;
tailo = o;
}
if (dest_index_buf[i] == ' ') goto onemorey;
src_offset_table[x] = heado;
}
/* printf("]\n"); */
num = readline(newsockfd, dest_index_buf, REAL_INDEX_BUF);
}
goto search_files;
}
/*
* Copy the agrep-options that are relevant to index search into
* index_argv (see man-pages for which options are relevant).
* Also, adjust patindex whenever options are skipped over.
* NOTE: agrep_argv does NOT contain two options after one '-'.
*/
index_argc = 0;
for (j=0; j<agrep_argc; j++) {
if (agrep_argv[j][0] == '-') {
if ((agrep_argv[j][1] == 'c') || (agrep_argv[j][1] == 'h') || (agrep_argv[j][1] == 'l') || (agrep_argv[j][1] == 'n') ||
(agrep_argv[j][1] == 's') || (agrep_argv[j][1] == 't') || (agrep_argv[j][1] == 'G') || (agrep_argv[j][1] == 'O') ||
(agrep_argv[j][1] == 'b') || (agrep_argv[j][1] == 'i') || (agrep_argv[j][1] == 'u') || (agrep_argv[j][1] == 'g') ||
(agrep_argv[j][1] == 'E') || (agrep_argv[j][1] == 'Z')) {
patindex --;
continue;
}
if ((agrep_argv[j][1] == 'd') || (agrep_argv[j][1] == 'L')) { /* skip over the argument too */
j++;
patindex -= 2;
continue;
}
if ((agrep_argv[j][1] == 'e') || (agrep_argv[j][1] == 'm')) {
strcpy(index_argv[index_argc], agrep_argv[j]);
index_argc ++; j++;
strcpy(index_argv[index_argc], agrep_argv[j]);
if (agrep_argv[j-1][1] == 'm') patbufpos = index_argc; /* where to put the patbuf if fast-boolean by mgrep() */
index_argc ++;
}
else { /* No arguments: just copy THAT option: maybe, change some options */
strcpy(index_argv[index_argc], agrep_argv[j]);
if (agrep_argv[j][1] == 'A') index_argv[index_argc][1] = 'h';
else if (agrep_argv[j][1] == 'x') index_argv[index_argc][1] = 'w';
index_argc++;
}
}
else { /* This is either the pattern itself or a filename */
strcpy(index_argv[index_argc], agrep_argv[j]);
index_argc++;
}
}
sprintf(index_argv[index_argc], "%s", INDEX_FILE);
index_argc ++;
#if 0
for (j=0; j<index_argc; j++) printf("index_argv[%d] = %s\n", j, index_argv[j]);
printf("patindex = %d\n", patindex);
#endif /*0*/
/* Search the index and process index-search-only options; Worry about file-pattern */
ret = search_index(GParse);
if (ret <= 0) RETURN(-1);
num_blocks=0;
if (OneFilePerBlock) {
for(iii=0; iii<round(OneFilePerBlock, 8*sizeof(int)); iii++) {
if (src_index_set[iii] == 0) continue;
for (jjj=0; jjj < 8*sizeof(int); jjj++)
if (src_index_set[iii] & mask_int[jjj])
num_blocks ++;
}
if (num_blocks > OneFilePerBlock) num_blocks = OneFilePerBlock; /* roundoff */
}
else {
for (iii=0; iii<MAX_PARTITION; iii++)
if (src_index_set[iii]) num_blocks++;
}
if (num_blocks <= 0) RETURN (0);
if ((src_index_set[REAL_PARTITION - 1] == 1) && !Only_first && !OPTIMIZEBYTELEVEL) {
fprintf(stderr, "Warning: pattern has words present in the stop-list: must SEARCH the files\n");
}
/* if just the NOBYTELEVEL flag is set, then it is an optimization which glimpse does and user need not be warned */
#if DEBUG
fprintf(stderr, "--> search=%d optimize=%d times=%d all=%d blocks=%d len=%d pat=%s scope=%d\n",
NOBYTELEVEL, OPTIMIZEBYTELEVEL, src_index_set[REAL_PARTITION - 2], src_index_set[REAL_PARTITION - 1], num_blocks, strlen(APattern), APattern, WHOLEFILESCOPE);
#endif /*DEBUG*/
dummypat[0] = '\0';
if (!MATCHFILE) { /* the argc,argv don't matter */
get_filenames(src_index_set, 0, NULL, dummylen, dummypat, file_num);
if (Only_first) { /* search the index only */
fprintf(stderr, "There are matches to %d out of %d %s\n", num_blocks, (OneFilePerBlock > 0) ? OneFilePerBlock : GNumpartitions, (OneFilePerBlock > 0) ? "files" : "blocks");
if (num_blocks > 0) {
char cc[8];
cc[0] = 'y';
#if !ISSERVER
if (!GNOPROMPT) {
fprintf(stderr, "Do you want to see the file names? (y/n)");
fgets(cc, 4, stdin);
}
#endif /*!ISSERVER*/
if (cc[0] == 'y') {
if (PRINTAPPXFILEMATCH && Only_first && GPRINTFILENUMBER) {
printf("BEGIN %d %d %d\n", bestmatcherrors, NOBYTELEVEL, OPTIMIZEBYTELEVEL);
}
for (jjj=0; jjj<GNumfiles; jjj++) {
if (GPRINTFILENUMBER) printf("%d", GFileIndex[jjj]);
else printf("%s", GTextfiles[jjj]);
if (PRINTAPPXFILEMATCH) {
if (GCOUNT) {
int n = 0;
printf(": ");
if (ByteLevelIndex && (src_offset_table != NULL)) {
struct offsets *p1 = src_offset_table[GFileIndex[jjj]];
while (p1 != NULL) {
n ++;
p1 = p1->next;
}
}
else n = 1; /* there is atleast 1 match */
printf("%d", n);
}
else {
printf(" [");
if (ByteLevelIndex && (src_offset_table != NULL)) {
struct offsets *p1 = src_offset_table[GFileIndex[jjj]];
while (p1 != NULL) {
printf(" %d", p1->offset);
p1 = p1->next;
}
}
printf("]");
}
}
printf("\n");
}
if (PRINTAPPXFILEMATCH && Only_first && GPRINTFILENUMBER) {
printf("END\n");
}
}
}
RETURN(0);
}
if (!OneFilePerBlock) searchpercent = num_blocks*100/GNumpartitions;
else searchpercent = num_blocks * 100 / OneFilePerBlock;
#if BG_DEBUG
fprintf(debug, "searchpercent = %d, num_blocks = %d\n", searchpercent, num_blocks);
#endif /*BG_DEBUG*/
#if !ISSERVER
if (!GNOPROMPT && (searchpercent > MAX_SEARCH_PERCENT)) {
char cc[8];
cc[0] = 'y';
fprintf(stderr, "Your query may search about %d%% of the total space! Continue? (y/n)", searchpercent);
fgets(cc, 4, stdin);
if (cc[0] != 'y') RETURN(0);
}
if (ByteLevelIndex && (searchpercent > DEF_MAX_INDEX_PERCENT)) NOBYTELEVEL = 1;
#endif /*!ISSERVER*/
}
else { /* set up the right options for -F in index_argv/index_argc itself since they will no longer be used */
index_argc=0;
strcpy(index_argv[0], GProgname);
/* adding the -h option, which is safer for -F */
index_argc ++;
index_argv[index_argc][0] = '-';
index_argv[index_argc][1] = 'h';
index_argv[index_argc][2] = '\0';
index_argc ++;
/* new code: bgopal, Feb/8/94: deleted udi's code here */
j = 0;
while (FileOpt[j] == '-') {
j++;
while ((FileOpt[j] != ' ') && (FileOpt[j] != '\0') && (FileOpt[j] != '\n')) {
if (j >= MAX_ARGS - 1) {
fprintf(stderr, "%s: too many options after -F: %s\n", GProgname, FileOpt);
RETURN(usage());
}
index_argv[index_argc][0] = '-';
index_argv[index_argc][1] = FileOpt[j];
index_argv[index_argc][2] = '\0';
index_argc ++;
j++;
}
if ((FileOpt[j] == '\0') || (FileOpt[j] == '\n')) break;
if ((FileOpt[j] == ' ') && (FileOpt[j-1] == '-')) {
fprintf(stderr, "%s: illegal option: '-' after -F\n", GProgname);
RETURN(usage());
}
else if (FileOpt[j] == ' ') while(FileOpt[j] == ' ') j++;
}
while(FileOpt[j] == ' ') j++;
fileopt_length = strlen(FileOpt);
strncpy(index_argv[index_argc],FileOpt+j,fileopt_length-j);
index_argv[index_argc][fileopt_length-j] = '\0';
index_argc++;
my_free(FileOpt, MAXFILEOPT);
FileOpt = NULL;
#if BG_DEBUG
fprintf(debug, "pattern to check with -F = %s\n",index_argv[index_argc-1]);
#endif /*BG_DEBUG*/
#if DEBUG
fprintf(stderr, "-F : ");
for (jj=0; jj < index_argc; jj++)
fprintf(stderr, " %s ",index_argv[jj]);
fprintf(stderr, "\n");
#endif /*DEBUG*/
fflush(stdout);
get_filenames(src_index_set, index_argc, index_argv, dummylen, dummypat, file_num);
/* Assume #files per partitions is appx constant */
if (OneFilePerBlock) num_blocks = GNumfiles;
else num_blocks = GNumfiles * GNumpartitions / p_table[GNumpartitions - 1];
if (Only_first) { /* search the index only */
fprintf(stderr, "There are matches to %d out of %d %s\n", num_blocks, (OneFilePerBlock > 0) ? OneFilePerBlock : GNumpartitions, (OneFilePerBlock > 0) ? "files" : "blocks");
if (num_blocks > 0) {
char cc[8];
cc[0] = 'y';
#if !ISSERVER
if (!GNOPROMPT) {
fprintf(stderr, "Do you want to see the file names? (y/n)");
fgets(cc, 4, stdin);
}
#endif /*!ISSERVER*/
if (cc[0] == 'y') {
if (PRINTAPPXFILEMATCH && Only_first && GPRINTFILENUMBER) {
printf("BEGIN %d %d %d\n", bestmatcherrors, NOBYTELEVEL, OPTIMIZEBYTELEVEL);
}
for (jjj=0; jjj<GNumfiles; jjj++) {
if (GPRINTFILENUMBER) printf("%d", GFileIndex[jjj]);
else printf("%s", GTextfiles[jjj]);
if (PRINTAPPXFILEMATCH) {
if (GCOUNT) {
int n = 0;
printf(": ");
if (ByteLevelIndex && (src_offset_table != NULL)) {
struct offsets *p1 = src_offset_table[GFileIndex[jjj]];
while (p1 != NULL) {
n ++;
p1 = p1->next;
}
}
else n = 1; /* there is atleast 1 match */
printf("%d", n);
}
else {
printf("[");
if (ByteLevelIndex && (src_offset_table != NULL)) {
struct offsets *p1 = src_offset_table[GFileIndex[jjj]];
while (p1 != NULL) {
printf(" %d", p1->offset);
p1 = p1->next;
}
}
printf("]");
}
}
printf("\n");
}
if (PRINTAPPXFILEMATCH && Only_first && GPRINTFILENUMBER) {
printf("END\n");
}
}
}
RETURN(0);
}
if (OneFilePerBlock) searchpercent = GNumfiles * 100 / OneFilePerBlock;
else searchpercent = GNumfiles * 100 / p_table[GNumpartitions - 1];
#if BG_DEBUG
fprintf(debug, "searchpercent = %d, num_files = %d\n", searchpercent, p_table[GNumpartitions - 1]);
#endif /*BG_DEBUG*/
#if !ISSERVER
if (!GNOPROMPT && (searchpercent > MAX_SEARCH_PERCENT)) {
char cc[8];
cc[0] = 'y';
fprintf(stderr, "Your query may search about %d%% of the total space! Continue? (y/n)", searchpercent);
fgets(cc, 4, stdin);
if (cc[0] != 'y') RETURN(0);
}
if (ByteLevelIndex && (searchpercent > DEF_MAX_INDEX_PERCENT)) NOBYTELEVEL = 1;
#endif /*!ISSERVER*/
}
search_files:
/* Replace -B by the number of errors if best-match */
if (GBESTMATCH && (my_B_index >= 0)) {
sprintf(&agrep_argv[my_B_index][1], "%d", bestmatcherrors);
#if BG_DEBUG
fprintf(debug, "Changing -B to -%d\n", bestmatcherrors);
#endif /*BG_DEBUG*/
}
agrep_argv[my_M_index][1] = 'Z';
agrep_argv[my_P_index][1] = 'Z';
/*
if (!ComplexBoolean && ((long)GParse & AND_EXP) && (my_l_index != -1) && !WHOLEFILESCOPE) agrep_argv[my_l_index][1] = 'l';
*/
if ((ComplexBoolean || ((long)GParse & AND_EXP)) && (my_l_index != -1) && !WHOLEFILESCOPE) agrep_argv[my_l_index][1] = 'l';
if (GNumfiles <= 0) RETURN(0);
if (glimpse_clientdied) RETURN(0);
/* must reinitialize since the above agrep calls for index-search ruined the real options: it is required EVEN IF ByteLevelIndex */
AM = fileagrep_init(agrep_argc, agrep_argv, MAXPAT, APattern);
/* do acutal search with postfiltering if structured query */
if (WHOLEFILESCOPE <= 0) {
if (!UseFilters) {
if (!ByteLevelIndex || NOBYTELEVEL) {
for (i=0; i<GNumfiles; i++) {
gprev_num_of_matched = gnum_of_matched;
SetCurrentFileName = 1;
if (GPRINTFILENUMBER) sprintf(CurrentFileName, "%d", GFileIndex[i]);
else strcpy(CurrentFileName, GTextfiles[i]);
if ((ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, stdout)) > 0) {
gnum_of_matched += ret;
gfiles_matched ++;
}
SetCurrentFileName = 0;
if (GLIMITOUTPUT > 0) {
if (GLIMITOUTPUT <= gnum_of_matched) break;
LIMITOUTPUT = GLIMITOUTPUT - gnum_of_matched;
}
if (GLIMITTOTALFILE > 0) {
if (GLIMITTOTALFILE <= gfiles_matched) break;
LIMITTOTALFILE = GLIMITTOTALFILE - gfiles_matched;
}
if ((ret < 0) && (errno == AGREP_ERROR)) break;
if (glimpse_clientdied) break;
fflush(stdout);
}
}
else {
for (i=0; i<GNumfiles; i++) {
gprev_num_of_matched = gnum_of_matched;
SetCurrentFileName = 1;
if (GPRINTFILENUMBER) sprintf(CurrentFileName, "%d", GFileIndex[i]);
else strcpy(CurrentFileName, GTextfiles[i]);
if (stat(GTextfiles[i], &file_stat_buf) == -1) continue;
if (file_stat_buf.st_mtime > index_stat_buf.st_mtime) {
/* fprintf(stderr, "Warning: file modified after indexing: must SEARCH %s\n", CurrentFileName); */
free_list(&src_offset_table[GFileIndex[i]]);
first_search = 1;
if ((ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, stdout)) > 0) {
gnum_of_matched += ret;
gfiles_matched ++;
}
}
else if ((ret = glimpse_search(AM, APattern, GD_length, GD_pattern, GTextfiles[i], GTextfiles[i], GFileIndex[i], src_offset_table, stdout)) > 0) {
gnum_of_matched += ret;
gfiles_matched ++;
}
SetCurrentFileName = 0;
if (GLIMITOUTPUT > 0) {
if (GLIMITOUTPUT <= gnum_of_matched) break;
LIMITOUTPUT = GLIMITOUTPUT - gnum_of_matched;
}
if (GLIMITTOTALFILE > 0) {
if (GLIMITTOTALFILE <= gfiles_matched) break;
LIMITTOTALFILE = GLIMITTOTALFILE - gfiles_matched;
}
if ((ret < 0) && (errno == AGREP_ERROR)) break;
if (glimpse_clientdied) break;
fflush(stdout);
}
}
}
else {
sprintf(outname[0], "%s/.glimpse_apply.%d", TEMP_DIR, getpid());
for (i=0; i<GNumfiles; i++) {
if (apply_filter(GTextfiles[i], outname[0]) == 1) {
gprev_num_of_matched = gnum_of_matched;
SetCurrentFileName = 1;
if (GPRINTFILENUMBER) sprintf(CurrentFileName, "%d", GFileIndex[i]);
else strcpy(CurrentFileName, GTextfiles[i]);
if (stat(GTextfiles[i], &file_stat_buf) == -1) continue;
if (!ByteLevelIndex || NOBYTELEVEL || (file_stat_buf.st_mtime > index_stat_buf.st_mtime)) {
first_search = 1;
if ((ret = fileagrep_search(AM, APattern, 1, outname, 0, stdout)) > 0) {
gnum_of_matched += ret;
gfiles_matched ++;
}
}
else {
if (file_stat_buf.st_mtime > index_stat_buf.st_mtime) {
/* fprintf(stderr, "Warning: file modified after indexing: must SEARCH %s\n", CurrentFileName); */
free_list(&src_offset_table[GFileIndex[i]]);
first_search = 1;
if ((ret = fileagrep_search(AM, APattern, 1, outname, 0, stdout)) > 0) {
gnum_of_matched += ret;
gfiles_matched ++;
}
}
else if ((ret = glimpse_search(AM, APattern, GD_length, GD_pattern, GTextfiles[i], outname[0], GFileIndex[i], src_offset_table, stdout)) > 0) {
gfiles_matched ++;
gnum_of_matched += ret;
}
}
unlink(outname[0]);
SetCurrentFileName = 0;
}
else {
if (!ByteLevelIndex || NOBYTELEVEL) {
first_search = 1;
if ((ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, stdout)) > 0) {
gnum_of_matched += ret;
gfiles_matched ++;
}
}
else {
SetCurrentFileName = 1;
if (GPRINTFILENUMBER) sprintf(CurrentFileName, "%d", GFileIndex[i]);
else strcpy(CurrentFileName, GTextfiles[i]);
if (stat(GTextfiles[i], &file_stat_buf) == -1) continue;
if (file_stat_buf.st_mtime > index_stat_buf.st_mtime) {
/* fprintf(stderr, "Warning: file modified after indexing: must SEARCH %s\n", CurrentFileName); */
free_list(&src_offset_table[GFileIndex[i]]);
first_search = 1;
if ((ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, stdout)) > 0) {
gnum_of_matched += ret;
gfiles_matched ++;
}
}
else if ((ret = glimpse_search(AM, APattern, GD_length, GD_pattern, GTextfiles[i], GTextfiles[i], GFileIndex[i], src_offset_table, stdout)) > 0) {
gnum_of_matched += ret;
gfiles_matched ++;
}
SetCurrentFileName = 0;
}
}
if (GLIMITOUTPUT > 0) {
if (GLIMITOUTPUT <= gnum_of_matched) break;
LIMITOUTPUT = GLIMITOUTPUT - gnum_of_matched;
}
if (GLIMITTOTALFILE > 0) {
if (GLIMITTOTALFILE <= gfiles_matched) break;
LIMITTOTALFILE = GLIMITTOTALFILE - gfiles_matched;
}
if ((ret < 0) && (errno == AGREP_ERROR)) break;
if (glimpse_clientdied) break;
fflush(stdout);
}
}
}
else {
FILE *tmpfp = NULL; /* to store structured query-search output */
int OLDLIMITOUTPUT; /* don't use LIMITs for search: only for filtering=identify_region(): agrep NEVER changes these 3 */
int OLDLIMITPERFILE;
int OLDLIMITTOTALFILE;
int OLDPRINTRECORD; /* don't use PRINTRECORD for search: only after filter_output() recognizes boolean in wholefilescope */
int OLDCOUNT; /* don't use OLDCOUNT for search: only after filter_output() recognizes boolean in wholefilescope */
if (!UseFilters) {
for (i=0; i<GNumfiles; i++) {
OLDLIMITOUTPUT = LIMITOUTPUT;
LIMITOUTPUT = 0;
OLDLIMITPERFILE = LIMITPERFILE;
LIMITPERFILE = 0;
OLDLIMITTOTALFILE = LIMITTOTALFILE;
LIMITTOTALFILE = 0;
OLDPRINTRECORD = PRINTRECORD;
PRINTRECORD = 1;
OLDCOUNT = COUNT;
COUNT = 0;
gprev_num_of_matched = gnum_of_matched;
if ((tmpfp = fopen(tempfile, "w")) == NULL) {
fprintf(stderr, "%s: cannot open for writing: %s, errno=%d\n", GProgname, tempfile, errno);
RETURN(usage());
}
SetCurrentFileName = 1;
if (GPRINTFILENUMBER) sprintf(CurrentFileName, "%d", GFileIndex[i]);
else strcpy(CurrentFileName, GTextfiles[i]);
if (!ByteLevelIndex || NOBYTELEVEL) {
first_search = 1;
ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, tmpfp);
}
else {
if (stat(GTextfiles[i], &file_stat_buf) == -1) {
fclose(tmpfp);
continue;
}
if (file_stat_buf.st_mtime > index_stat_buf.st_mtime) {
/* fprintf(stderr, "Warning: file modified after indexing: must SEARCH %s\n", CurrentFileName); */
free_list(&src_offset_table[GFileIndex[i]]);
first_search = 1;
ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, tmpfp);
}
else ret = glimpse_search(AM, APattern, GD_length, GD_pattern, GTextfiles[i], GTextfiles[i], GFileIndex[i], src_offset_table, tmpfp);
}
SetCurrentFileName = 0;
fflush(tmpfp);
fclose(tmpfp);
tmpfp = NULL;
if ((ret < 0) && (errno == AGREP_ERROR)) break;
#if DEBUG
printf("done search\n");
fflush(stdout);
#endif /*DEBUG*/
LIMITOUTPUT = OLDLIMITOUTPUT;
LIMITPERFILE = OLDLIMITPERFILE;
LIMITTOTALFILE = OLDLIMITTOTALFILE;
PRINTRECORD = OLDPRINTRECORD;
COUNT = OLDCOUNT;
ret = filter_output(GTextfiles[i], tempfile, GParse, GD_pattern, GD_length, GOUTTAIL, nullfp, StructuredIndex);
gnum_of_matched += (ret > 0) ? ret : 0;
gfiles_matched += (ret > 0) ? 1 : 0;
if (GLIMITOUTPUT > 0) {
if (GLIMITOUTPUT <= gnum_of_matched) break;
LIMITOUTPUT = GLIMITOUTPUT - gnum_of_matched;
}
if (GLIMITTOTALFILE > 0) {
if (GLIMITTOTALFILE <= gfiles_matched) break;
LIMITTOTALFILE = GLIMITTOTALFILE - gfiles_matched;
}
if (glimpse_clientdied) break;
fflush(stdout);
}
}
else { /* we should try to apply the filter (we come here with -W -z, say) */
sprintf(outname[0], "%s/.glimpse_apply.%d", TEMP_DIR, getpid());
for (i=0; i<GNumfiles; i++) {
OLDLIMITOUTPUT = LIMITOUTPUT;
LIMITOUTPUT = 0;
OLDLIMITPERFILE = LIMITPERFILE;
LIMITPERFILE = 0;
OLDLIMITTOTALFILE = LIMITTOTALFILE;
LIMITTOTALFILE = 0;
OLDPRINTRECORD = PRINTRECORD;
PRINTRECORD = 1;
OLDCOUNT = COUNT;
COUNT = 0;
gprev_num_of_matched = gnum_of_matched;
if ((tmpfp = fopen(tempfile, "w")) == NULL) {
fprintf(stderr, "%s: cannot open for writing: %s, errno=%d\n", GProgname, tempfile, errno);
RETURN(usage());
}
SetCurrentFileName = 1;
if (GPRINTFILENUMBER) sprintf(CurrentFileName, "%d", GFileIndex[i]);
else strcpy(CurrentFileName, GTextfiles[i]);
if (apply_filter(GTextfiles[i], outname[0]) == 1) {
if (stat(GTextfiles[i], &file_stat_buf) == -1) {
fclose(tmpfp);
continue;
}
if (!ByteLevelIndex || NOBYTELEVEL || (file_stat_buf.st_mtime > index_stat_buf.st_mtime)) {
first_search = 1;
ret = fileagrep_search(AM, APattern, 1, outname, 0, tmpfp);
}
else {
if (file_stat_buf.st_mtime > index_stat_buf.st_mtime) {
/* fprintf(stderr, "Warning: file modified after indexing: must SEARCH %s\n", CurrentFileName); */
free_list(&src_offset_table[GFileIndex[i]]);
first_search = 1;
ret = fileagrep_search(AM, APattern, 1, outname, 0, tmpfp);
}
else ret = glimpse_search(AM, APattern, GD_length, GD_pattern, GTextfiles[i], outname[0], GFileIndex[i], src_offset_table, tmpfp);
}
unlink(outname[0]);
}
else {
if (!ByteLevelIndex || NOBYTELEVEL) {
first_search = 1;
ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, tmpfp);
}
else {
if (stat(GTextfiles[i], &file_stat_buf) == -1) {
fclose(tmpfp);
continue;
}
if (file_stat_buf.st_mtime > index_stat_buf.st_mtime) {
/* fprintf(stderr, "Warning: file modified after indexing: must SEARCH %s\n", CurrentFileName); */
free_list(&src_offset_table[GFileIndex[i]]);
first_search = 1;
ret = fileagrep_search(AM, APattern, 1, >extfiles[i], 0, tmpfp);
}
else ret = glimpse_search(AM, APattern, GD_length, GD_pattern, GTextfiles[i], GTextfiles[i], GFileIndex[i], src_offset_table, tmpfp);
}
}
SetCurrentFileName = 0;
fflush(tmpfp);
fclose(tmpfp);
tmpfp = NULL;
if ((ret < 0) && (errno == AGREP_ERROR)) break;
#if DEBUG
printf("done search\n");
fflush(stdout);
#endif /*DEBUG*/
LIMITOUTPUT = OLDLIMITOUTPUT;
LIMITPERFILE = OLDLIMITPERFILE;
LIMITTOTALFILE = OLDLIMITTOTALFILE;
PRINTRECORD = OLDPRINTRECORD;
COUNT = OLDCOUNT;
ret = filter_output(GTextfiles[i], tempfile, GParse, GD_pattern, GD_length, GOUTTAIL, nullfp, StructuredIndex);
gnum_of_matched += (ret > 0) ? ret : 0;
gfiles_matched += (ret > 0) ? 1 : 0;
if (GLIMITOUTPUT > 0) {
if (GLIMITOUTPUT <= gnum_of_matched) break;
LIMITOUTPUT = GLIMITOUTPUT - gnum_of_matched;
}
if (GLIMITTOTALFILE > 0) {
if (GLIMITTOTALFILE <= gfiles_matched) break;
LIMITTOTALFILE = GLIMITTOTALFILE - gfiles_matched;
}
if (glimpse_clientdied) break;
fflush(stdout);
}
}
}
if (errno == AGREP_ERROR) {
fprintf(stderr, "%s: error in options or arguments to `agrep'\n", HARVEST_PREFIX);
}
RETURN(0);
}
else { /* argc > 0: simply call agrep */
#if DEBUG
for (i=0; i<agrep_argc; i++)
printf("agrep_argv[%d] = %s\n", i, agrep_argv[i]);
#endif /*DEBUG*/
i = fileagrep(oldargc, oldargv, 0, stdout);
RETURN(i);
}
}
/* end of process_query() */
/*
* Simple function to remove the non-existent files from the set of
* files passed onto agrep for search. These are the files which got
* DELETED after the index was built (but a fresh index was NOT built).
* Redundant since agrep opens them anyway and stat is as bad as open.
*/
int
purge_filenames(filenames, num)
CHAR **filenames;
int num;
{
struct stat buf;
int i, j;
int newnum = num;
int ret;
for (i=0; i<newnum; i++) {
if (-1 == (ret = stat(filenames[i], &buf))) {
#if BG_DEBUG
fprintf(debug, "stat on %s = %d\n", filenames[i], ret);
#endif /*BG_DEBUG*/
my_free(filenames[i], 0);
for (j=i; j<newnum-1; j++)
filenames[j] = filenames[j+1];
filenames[j] = NULL;
newnum --;
i--; /* to counter the ++ on top */
}
}
#if BG_DEBUG
fprintf(debug, "Old numfiles=%d\tNew numfiles=%d\n", num, newnum);
for (i=0; i<newnum; i++)
fprintf(debug, "file %d = %s\n", i, filenames[i]);
#endif /*BG_DEBUG*/
return newnum;
}
CHAR filter_buf[BLOCKSIZE + MAXPAT*2];
/* returns #of bytes stripped off */
int getbyteoff(buf, pbyteoff)
CHAR *buf;
int *pbyteoff;
{
CHAR temp[32];
int i = 0;
while (isdigit(*buf) && (i<32)) temp[i++] = *buf++;
if ((*buf != '=') || (*(buf + 1) != ' ')) return -1;
temp[i] = '\0';
*pbyteoff = atoi(temp);
return i+2;
}
/*
* Filter the output in infile:
*
* -- get the matched line/record-s using GD_pattern, GD_length and GOUTAIL
* -- call identify regions using matched line/record's byte offset
* -- collect patterns corr. to that attribute into a new pattern (in split_pat itself)
* -- see if one of them matches that line/record using memagrep
* -- if so, output that line/record onto stdout
*/
int
filter_output(infile, outfile, GParse, GD_pattern, GD_length, GOUTTAIL, nullfp, num_attr)
char *infile;
char *outfile;
ParseTree *GParse;
CHAR GD_pattern[];
int GD_length[];
int GOUTTAIL;
FILE *nullfp;
int num_attr;
{
FILE *outfp;
FILE *displayfp = NULL;
FILE *storefp = NULL;
int num_read;
int residue = 0;
int byteoff;
int attribute;
int i, ii; /* i is forloop index, ii is booleaneval index */
CHAR *final_end;
CHAR *current_end;
CHAR *current_begin;
CHAR *previous_begin;
int skiplen;
char s[MAX_LINE_LEN];
CHAR c1, c2;
int printed, numprinted = 0; /* returns number of printed records if successful in matching the pattern in the object infile */
char *attrname;
int success = 0; /* do we print the stored output or not */
int count = 0;
#if BG_DEBUG
printf("INFILE=%s\n", infile);
printf("OUTFILE\n");
sprintf(s, "exec cat %s\n", outfile);
system(s);
#endif /*BG_DEBUG*/
if ((outfp = fopen(outfile, "r")) == NULL) return 0;
if (StructuredIndex && (-1 == region_create(infile))) {
fclose(outfp);
return 0;
}
if (ComplexBoolean || ((long)GParse & AND_EXP)) {
sprintf(s, "%s/.glimpse_storeoutput.%d", TEMP_DIR, getpid());
if ((displayfp = storefp = fopen(s, "w")) == NULL) {
if (StructuredIndex) region_destroy();
fclose(outfp);
return 0;
}
}
else {
displayfp = stdout;
/* cannot come to filter_output in this case unless -a! */
}
memset(matched_terminals, '\0', num_terminals);
while ( ( (num_read = fread(filter_buf + residue, 1, BLOCKSIZE - residue, outfp)) > 0) || (residue > 0)) {
if (num_read <= 0) {
final_end = filter_buf + residue;
num_read = residue;
residue = 0;
}
else {
num_read += residue;
final_end = (CHAR *)backward_delimiter(filter_buf + num_read, filter_buf, GD_pattern, GD_length, GOUTTAIL);
residue = filter_buf + num_read - final_end;
}
#if DEBUG
fprintf(stderr, "filter_buf=%x final_end=%x residue=%x last_chars=%c%c%c num_read=%x\n",
filter_buf, final_end, residue, *(final_end-2), *(final_end-1), *(final_end), num_read);
#endif /*DEBUG*/
current_begin = previous_begin = filter_buf;
current_end = (CHAR *)forward_delimiter(filter_buf, filter_buf + num_read, GD_pattern, GD_length, GOUTTAIL); /* skip over prefixes like filename */
if (!GOUTTAIL) current_end = (CHAR *)forward_delimiter((long)current_end + GD_length, final_end, GD_pattern, GD_length, GOUTTAIL);
while (current_end <= final_end) {
previous_begin = current_begin;
/* look for %d= */
byteoff = -1;
while (current_begin < current_end) {
if (isdigit(*current_begin)) {
skiplen = getbyteoff(current_begin, &byteoff);
#if BG_DEBUG
fprintf(debug, "byteoff=%d skiplen=%d\n", byteoff, skiplen);
#endif /*BG_DEBUG*/
if ((skiplen < 0) || (byteoff < 0)) {
current_begin ++;
continue;
}
else break;
}
else current_begin ++;
}
#if DEBUG
printf("current_begin=%x current_end=%x final_end=%x residue=%x num_read=%x\n", current_begin, current_end, final_end, residue, num_read);
#endif /*DEBUG*/
#if DEBUG
printf("byteoff=%d skiplen=%d\n", byteoff, skiplen);
#endif /*DEBUG*/
if ((skiplen < 0) || (byteoff < 0)) { /* output the whole line as it is: there is nothing to strip (e.g., -l) */
fwrite(previous_begin, 1, current_end-previous_begin, displayfp);
numprinted ++;
}
else if ( (num_attr <= 0) || (((attribute = region_identify(byteoff, 0)) < num_attr) && (attribute >= 0)) ) {
/* prefix is from previous_begin to current_begin. Skip skiplen from current_begin. Rest until current_end is valid output */
if (num_attr <= 0) attribute = 0;
#if BG_DEBUG
fprintf(debug, "region@%d=%d\n", byteoff, attribute);
#endif /*BG_DEBUG*/
c1 = *(current_begin + skiplen - 1);
c2 = *(current_end + 1);
printed = 0;
for (i=0; i<num_terminals; i++) {
if (matched_terminals[i] && (GFILENAMEONLY || FILEOUT || printed || ((LIMITOUTPUT > 0) && (numprinted >= LIMITOUTPUT)) || ((LIMITPERFILE > 0) && (numprinted >= LIMITPERFILE)))) continue;
if ((terminals[i].data.leaf.attribute == 0) || ((int)(terminals[i].data.leaf.attribute) == attribute)) {
*(current_begin + skiplen - 1) = '\n';
*(current_end + 1) = '\n';
if (memagrep_search( strlen(terminals[i].data.leaf.value), terminals[i].data.leaf.value,
current_end - current_begin - skiplen + 1, current_begin + skiplen - 1,
0, nullfp) > 0) {
#if 0
*(current_end + 1) = '\0';
printf("--> search succeeded for %s in %s\n", terminals[i].data.leaf.value, previous_begin);
#endif /*0*/
*(current_begin + skiplen - 1) = c1;
*(current_end + 1) = c2;
matched_terminals[i] = 1; /* must reevaluate/set since don't know if it should be printed */
if (!(((LIMITOUTPUT > 0) && (numprinted >= LIMITOUTPUT)) ||
((LIMITPERFILE > 0) && (numprinted >= LIMITPERFILE))) && !printed) { /* see if it was useful later */
if (!COUNT && !FILEOUT) {
fwrite(previous_begin, 1, current_begin - previous_begin, displayfp);
if (PRINTATTR) fprintf(displayfp, "%s# ",
(attrname = attr_id_to_name(attribute)) == NULL ? "(null)" : attrname);
if (GBYTECOUNT) fprintf(displayfp, "%d= ", byteoff);
if (PRINTRECORD) {
fwrite(current_begin + skiplen, 1, current_end - current_begin - skiplen, displayfp);
}
else {
if (*(current_begin + skiplen) == '@') {
int iii = 0;
while (current_begin[skiplen + iii] != '}')
fputc(current_begin[skiplen + iii++], displayfp);
fputc('}', displayfp);
}
fputc('\n', displayfp);
}
}
printed = 1;
numprinted ++;
}
}
else {
#if 0
*(current_end + 1) = '\0';
printf("--> search failed for %s in %s\n", terminals[i].data.leaf.value, previous_begin);
#endif /*0*/
*(current_begin + skiplen - 1) = c1;
*(current_end + 1) = c2;
}
}
}
if (!success) {
if (ComplexBoolean) {
success = eval_tree(GParse, matched_terminals);
}
else {
if ((long)GParse & AND_EXP) {
success = 0;
for (ii=0; ii<num_terminals; ii++) {
if (!matched_terminals[ii]) break;
}
if (ii >= num_terminals) success = 1;
}
else {
success = 0;
/* cannot come to filter_output in this case unless -a! */
}
}
}
/* optimize options that do not need all the matched lines */
if (success) {
if (GFILENAMEONLY) {
fprintf(stdout, "%s\n", infile);
if (storefp != NULL) fclose(storefp); /* don't bother to flush! */
storefp = NULL;
goto unlink_and_quit;
}
else if (FILEOUT) {
file_out(infile);
if (storefp != NULL) fclose(storefp); /* don't bother to flush! */
storefp = NULL;
goto unlink_and_quit;
}
}
}
if (((LIMITOUTPUT > 0) && (numprinted >= LIMITOUTPUT)) || ((LIMITPERFILE > 0) && (numprinted >= LIMITPERFILE))) goto double_break;
if (glimpse_clientdied) goto double_break;
if (current_end >= final_end) break;
current_begin = current_end;
if (!GOUTTAIL) current_end = (CHAR *)forward_delimiter((long)current_end + GD_length, final_end, GD_pattern, GD_length, GOUTTAIL);
else current_end = (CHAR *)forward_delimiter(current_end, final_end, GD_pattern, GD_length, GOUTTAIL);
}
if (residue > 0) memcpy(filter_buf, final_end, residue);
}
double_break:
/* Come here on normal exit or when the current agrep-output is no longer of any use */
if (!success) {
if (ComplexBoolean) {
success = eval_tree(GParse, matched_terminals);
}
else {
if ((long)GParse & AND_EXP) {
success = 0;
for (ii=0; ii<num_terminals; ii++) {
if (!matched_terminals[ii]) break;
}
if (ii >= num_terminals) success = 1;
}
else {
success = 0;
/* cannot come to filter_output in this case unless -a! */
}
}
}
/* Print the temporary output onto stdout if search was successful; unlink the temprorary file */
if (success) {
if (GFILENAMEONLY) { /* all other output options are useless since they all deal with the MATCHED line */
fprintf(stdout, "%s\n", infile);
if (storefp != NULL) fclose(storefp); /* don't bother to flush! */
storefp = NULL;
}
else if (COUNT && !FILEOUT) {
if(!NOFILENAME) fprintf(stdout, "%s: %d\n", CurrentFileName, numprinted);
else fprintf(stdout, "%d\n", numprinted);
if (storefp != NULL) fclose(storefp); /* don't bother to flush! */
storefp = NULL;
}
else if (FILEOUT) {
file_out(infile);
if (storefp != NULL) fclose(storefp); /* don't bother to flush! */
storefp = NULL;
}
else if (storefp != NULL) {
fflush(storefp);
fclose(storefp);
#if DEBUG
printf("STOREOUTPUT\n");
sprintf(s, "exec cat %s/.glimpse_storeoutput.%d\n", TEMP_DIR, getpid());
system(s);
#endif /*DEBUG*/
sprintf(s, "%s/.glimpse_storeoutput.%d", TEMP_DIR, getpid());
if ((storefp = fopen(s, "r")) != NULL) {
while (fgets(s, MAX_LINE_LEN, storefp) != NULL) fputs(s, stdout);
fclose(storefp);
}
storefp = NULL;
}
}
else {
if (storefp != NULL) fclose(storefp); /* else don't bother to flush */
}
unlink_and_quit:
sprintf(s, "%s/.glimpse_storeoutput.%d", TEMP_DIR, getpid());
unlink(s);
if (StructuredIndex) region_destroy();
fclose(outfp);
if (GFILENAMEONLY) {
if (numprinted > 0) return 1;
else return 0;
}
else if (ComplexBoolean || ((long)GParse & AND_EXP)) {
if (success) return numprinted;
else return 0;
} else { /* must be -a */
return numprinted;
}
}
usage()
{
fprintf(stderr, "\nThis is glimpse version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
fprintf(stderr, "usage: %s [-#abcdehiklnprstwxyBCDEGIMNQSVW] [-F pat] [-H dir] [-J host] [-K port] [-L num] [-R lim] [-T dir] pattern [files]", GProgname);
fprintf(stderr, "\n");
fprintf(stderr, "summary of frequently used options:\n");
fprintf(stderr, "(For a more detailed listing see 'man glimpse'.)\n");
fprintf(stderr, "-#: find matches with at most # errors\n");
fprintf(stderr, "-c: output the number of matched records\n");
fprintf(stderr, "-d: define record delimiter\n");
fprintf(stderr, "-h: do not output file names\n");
fprintf(stderr, "-i: case-insensitive search, e.g., 'a' = 'A'\n");
fprintf(stderr, "-l: output the names of files that contain a match\n");
fprintf(stderr, "-n: output record prefixed by record number\n");
/* fprintf(stderr, "-v: output those records that have no matches\n"); */
fprintf(stderr, "-w: pattern has to match as a word, e.g., 'win' will not match 'wind'\n");
fprintf(stderr, "-B: best match mode. find the closest matches to the pattern\n");
fprintf(stderr, "-F 'pat': 'pat' is used to match against file names\n");
fprintf(stderr, "-G: output the (whole) files that contain a match\n");
fprintf(stderr, "-H 'dir': the glimpse index is located in directory 'dir'\n");
fprintf(stderr, "-L 'num': limit the output to 'num' records only\n");
fprintf(stderr, "\n");
fprintf(stderr, "For questions about glimpse, please contact `%s'\n", GLIMPSE_EMAIL);
return -1; /* useful if we make glimpse into a library */
/*
* Undocumented Options for SFS (like RPC calls)
* print file number of match instead of file name: -g
* print enclosing offsets of matched record: -q
* NOT print matched record: -u
* E.G. USAGE: -qbug (b prints offset of pattern: can also use -lg or -Ng)
* look only at index: -E
* look at matched offsets in files as seen in index (w/o searching): -QN
* E.G. USAGE: -EQNgy
* read the -EQNg or just -QNg output from stdin and perform actual search w/o
* searching the index (take hints from user): -U
* NOTE: can't use U unless QNg are all used together (e.g., BEGIN/END won't be printed)
*/
}
usageS()
{
fprintf(stderr, "\nThis is glimpse server version %s, %s.\n\n", GLIMPSE_VERSION, GLIMPSE_DATE);
fprintf(stderr, "usage: %s [-H dir] [-J host] [-K port]", GProgname);
fprintf(stderr, "\n");
fprintf(stderr, "-H 'dir': the glimpse index is located in directory 'dir'\n");
fprintf(stderr, "-J 'host': the host name (string) clients must use / server runs on \n");
fprintf(stderr, "-K 'port': the port (short integer) clients must use / server runs on \n");
fprintf(stderr, "\n");
fprintf(stderr, "For questions about glimpse, please contact `%s'\n", GLIMPSE_EMAIL);
return -1; /* useful if we make glimpse into a library */
}
#if CLIENTSERVER
/*
* do_select() - based on select_loop() from the Harvest Broker.
* -- Courtesy: Darren Hardy, hardy@cs.colorado.edu
*/
int do_select(sock, sec)
int sock; /* the socket to wait for */
int sec; /* the number of seconds to wait */
{
struct timeval to;
fd_set qready;
int err;
if (sock < 0 || sec < 0)
return 0;
FD_ZERO(&qready);
FD_SET(sock, &qready);
to.tv_sec = sec;
to.tv_usec = 0;
if ((err = select(sock + 1, &qready, NULL, NULL, &to)) < 0) {
if (errno == EINTR)
return 0;
perror("select");
return -1;
}
if (err == 0)
return 0;
/* If there's someone waiting to get it, let them through */
return (FD_ISSET(sock, &qready) ? 1 : 0);
}
#endif /* CLIENTSERVER */